Hyperparameter optimization for Random Forest Classifier using the Optuna lib
import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
optuna.logging.set_verbosity(optuna.logging.WARNING)
X, y = make_classification(
n_samples=250,
n_features=10,
n_informative=5,
n_redundant=3,
random_state=42,
shuffle=True
)
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.20,
random_state=42
)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
def objective(trial):
# Number of trees in random forest
n_estimators = trial.suggest_int(name="n_estimators", low=100, high=500, step=100)
# Number of features to consider at every split
max_features = trial.suggest_categorical(name="max_features", choices=['auto', 'sqrt'])
# Maximum number of levels in tree
max_depth = trial.suggest_int(name="max_depth", low=10, high=110, step=20)
# Minimum number of samples required to split a node
min_samples_split = trial.suggest_int(name="min_samples_split", low=2, high=10, step=2)
# Minimum number of samples required at each leaf node
min_samples_leaf = trial.suggest_int(name="min_samples_leaf", low=1, high=4, step=1)
params = {
"n_estimators": n_estimators,
"max_features": max_features,
"max_depth": max_depth,
"min_samples_split": min_samples_split,
"min_samples_leaf": min_samples_leaf
}
model = RandomForestClassifier(random_state=SEED, **params)
cv_score = cross_val_score(model, X_train, y_train, n_jobs=4, cv=5)
mean_cv_accuracy = cv_score.mean()
return mean_cv_accuracy
study = optuna.create_study()
study.optimize(objective, n_trials=5)
# Train a new model using the best parameters
best_model = RandomForestClassifier(random_state=SEED, **study.best_params)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
test_acc = accuracy_score(y_test, y_pred)
test_precision, test_recall, test_f1, _ = precision_recall_fscore_support(
y_test,
y_pred,
average='binary'
)
print("test_accuracy:", test_acc)
print("test_precision:", test_precision)
print("test_recall:", test_recall)
print("test_f1_score:", test_f1)