Hyperparameter Tuning Guide

Parameters vs Hyperparameters

Parameters are learned from data during training (e.g., neural network weights). Hyperparameters are set before training and control the learning process.

# Examples of hyperparameters:

# Random Forest
# - n_estimators: number of trees
# - max_depth: tree depth
# - min_samples_split: minimum samples to split

# Neural Networks
# - learning_rate: step size for gradient descent
# - batch_size: samples per gradient update
# - num_layers: network depth

# XGBoost
# - learning_rate: shrinkage
# - max_depth: tree depth
# - n_estimators: boosting rounds

# Why tune?
# Default hyperparameters rarely give best results
# Proper tuning can improve accuracy by 5-20%!

Grid Search

Exhaustively search through all combinations of specified hyperparameters.

from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Define parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# This will try 3 * 4 * 3 * 3 = 108 combinations!

model = RandomForestClassifier(random_state=42)

grid_search = GridSearchCV(
    model,
    param_grid,
    cv=5,                    # 5-fold cross-validation
    scoring='f1',            # Metric to optimize
    n_jobs=-1,               # Use all CPU cores
    verbose=2
)

grid_search.fit(X_train, y_train)

# Best results
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best score: {grid_search.best_score_:.4f}")

# Use best model
best_model = grid_search.best_estimator_
predictions = best_model.predict(X_test)

# Pros: Thorough, guaranteed to find best in grid
# Cons: Very slow, exponential growth with parameters

Random Search

Randomly sample from parameter distributions. Often better than grid search!

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform

# Define parameter distributions
param_dist = {
    'n_estimators': randint(100, 500),
    'max_depth': randint(3, 20),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 10),
    'max_features': uniform(0.1, 0.9)  # Continuous
}

random_search = RandomizedSearchCV(
    RandomForestClassifier(random_state=42),
    param_distributions=param_dist,
    n_iter=50,               # Number of random combinations
    cv=5,
    scoring='f1',
    n_jobs=-1,
    random_state=42,
    verbose=2
)

random_search.fit(X_train, y_train)

print(f"Best parameters: {random_search.best_params_}")
print(f"Best score: {random_search.best_score_:.4f}")

# Why Random Search is often better:
# - Same compute budget explores more values per parameter
# - Some parameters matter more than others
# - Not all combinations need to be tested

Bayesian Optimization with Optuna

Intelligently explore the parameter space using probabilistic models. The state-of-the-art approach.

# pip install optuna
import optuna
from sklearn.model_selection import cross_val_score

def objective(trial):
    # Define hyperparameter search space
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
        'max_features': trial.suggest_float('max_features', 0.1, 1.0)
    }

    model = RandomForestClassifier(**params, random_state=42)
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='f1')
    return score.mean()

# Create study and optimize
study = optuna.create_study(direction='maximize')  # Maximize F1
study.optimize(objective, n_trials=100, show_progress_bar=True)

# Results
print(f"Best trial: {study.best_trial.params}")
print(f"Best score: {study.best_value:.4f}")

# Train final model with best params
best_model = RandomForestClassifier(**study.best_trial.params, random_state=42)
best_model.fit(X_train, y_train)

# Visualize optimization
optuna.visualization.plot_optimization_history(study)
optuna.visualization.plot_param_importances(study)

Optuna for Deep Learning

import optuna
import tensorflow as tf
from tensorflow import keras

def create_model(trial):
    # Architecture hyperparameters
    n_layers = trial.suggest_int('n_layers', 1, 4)
    units = trial.suggest_int('units', 32, 256)
    dropout = trial.suggest_float('dropout', 0.1, 0.5)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)

    model = keras.Sequential()
    model.add(keras.layers.Input(shape=(X_train.shape[1],)))

    for i in range(n_layers):
        model.add(keras.layers.Dense(units, activation='relu'))
        model.add(keras.layers.Dropout(dropout))

    model.add(keras.layers.Dense(1, activation='sigmoid'))

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

def objective(trial):
    model = create_model(trial)

    # Training hyperparameters
    batch_size = trial.suggest_int('batch_size', 16, 128)

    # Pruning callback (stop unpromising trials early)
    pruning_callback = optuna.integration.TFKerasPruningCallback(
        trial, 'val_accuracy'
    )

    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=batch_size,
        validation_split=0.2,
        callbacks=[pruning_callback],
        verbose=0
    )

    return max(history.history['val_accuracy'])

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner()  # Early stop bad trials
)
study.optimize(objective, n_trials=50)

XGBoost Tuning Guide

import xgboost as xgb
import optuna

def objective(trial):
    params = {
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'booster': 'gbtree',

        # Most important parameters
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 10),

        # Regularization
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10, log=True),

        # Tree-specific
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
    }

    model = xgb.XGBClassifier(**params, random_state=42, n_jobs=-1)
    score = cross_val_score(model, X_train, y_train, cv=5, scoring='roc_auc')
    return score.mean()

# Tuning order recommendation:
# 1. n_estimators, learning_rate (most important)
# 2. max_depth, min_child_weight
# 3. subsample, colsample_bytree
# 4. reg_alpha, reg_lambda (regularization)

Practical Tips

Start with defaults: Get a baseline before tuning
Focus on important parameters: Not all hyperparameters matter equally
Use Random Search first: Quick exploration of the search space
Then Bayesian optimization: For fine-tuning promising regions
Always use cross-validation: Single train-test split is unreliable
Set a time budget: Diminishing returns after certain point
Log your experiments: Use MLflow or Weights & Biases

Common Parameter Ranges

# Random Forest
param_dist_rf = {
    'n_estimators': (100, 500),
    'max_depth': (5, 30),
    'min_samples_split': (2, 20),
    'min_samples_leaf': (1, 10),
    'max_features': ['sqrt', 'log2', 0.3, 0.5, 0.7]
}

# XGBoost / LightGBM
param_dist_xgb = {
    'learning_rate': (0.01, 0.3),
    'n_estimators': (100, 1000),
    'max_depth': (3, 10),
    'min_child_weight': (1, 10),
    'subsample': (0.5, 1.0),
    'colsample_bytree': (0.5, 1.0),
    'reg_alpha': (1e-8, 10),  # Log scale
    'reg_lambda': (1e-8, 10)  # Log scale
}

# Neural Networks
param_dist_nn = {
    'learning_rate': (1e-5, 1e-2),  # Log scale
    'batch_size': [16, 32, 64, 128],
    'hidden_layers': (1, 5),
    'units_per_layer': (32, 512),
    'dropout': (0.0, 0.5)
}

Master Hyperparameter Tuning

Our Data Science program teaches systematic model optimization techniques.

Explore Data Science Program

Hyperparameter Tuning

Parameters vs Hyperparameters

Grid Search

Random Search

Bayesian Optimization with Optuna

Optuna for Deep Learning

XGBoost Tuning Guide

Practical Tips

Common Parameter Ranges

Master Hyperparameter Tuning

Related Articles

Model Evaluation

XGBoost

Machine Learning Fundamentals