MLFlow Quickstart
import mlflow
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score
EXPERIMENT_NAME = 'mlflow-experiment'
EXPERIMENT_ID = mlflow.create_experiment(EXPERIMENT_NAME)
X_train, y_train, X_test, y_test = ...
depth = ...
model = DecisionTreeClassifier(max_depth=depth)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
balanced_acc = balanced_accuracy_score(y_true, y_pred)
f1 = f1_score(y_true)
RUN_NAME = 'run-1'
with mlflow.start_run(experiment_id=EXPERIMENT_ID, run_name=RUN_NAME) as run:
# Track parameters
mlflow.log_param('depth', depth)
# Track metrics
mlflow.log_metric('accuracy', accuracy)
mlflow.log_metrics({'balanced_acc': balanced_acc, 'f1': f1})
# Track model
mlflow.sklearn.log_model(model, 'dt-classifier')
# Launch the MLFlow Web UI, accessible at http://localhost:5000
# !mlflow ui
# Retrieve experiment/run results prorammatically and compare them
client = mlflow.tracking.MlflowClient()
# Retrieve Experiment information
EXPERIMENT_ID = client.get_experiment_by_name(EXPERIMENT_NAME).experiment_id
# Retrieve Runs information (parameter 'depth', metric 'accuracy', 'balanced_accuracy', 'f1-score')
ALL_RUNS_INFO = client.list_run_infos(EXPERIMENT_ID)
ALL_RUNS_ID = [run.run_id for run in ALL_RUNS_INFO]
ALL_PARAM = [client.get_run(run_id).data.params['depth'] for run_id in ALL_RUNS_ID]
ALL_METRIC = [client.get_run(run_id).data.metrics['accuracy'] for run_id in ALL_RUNS_ID]
# View Runs information
df = pd.DataFrame({'run_id': ALL_RUNS_ID, 'params': ALL_PARAM, 'metrics': ALL_METRIC})
# Retrieve Artifact from best run
best_run_id = df.sort_values('metrics', ascending=False).iloc[0]['run_id']
best_model_path = client.download_artifacts(best_run_id, 'dt-classifier')
best_model = mlflow.sklearn.load_model(best_model_path)