Mlflow Quickstart

MLFlow Quickstart
import mlflow
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score

EXPERIMENT_NAME = 'mlflow-experiment'
EXPERIMENT_ID = mlflow.create_experiment(EXPERIMENT_NAME)

X_train, y_train, X_test, y_test = ...
depth = ...

model = DecisionTreeClassifier(max_depth=depth)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
balanced_acc = balanced_accuracy_score(y_true, y_pred)
f1 = f1_score(y_true)


RUN_NAME = 'run-1'

with mlflow.start_run(experiment_id=EXPERIMENT_ID, run_name=RUN_NAME) as run:
  # Track parameters
  mlflow.log_param('depth', depth)

  # Track metrics
  mlflow.log_metric('accuracy', accuracy)
  mlflow.log_metrics({'balanced_acc': balanced_acc, 'f1': f1})

  # Track model
  mlflow.sklearn.log_model(model, 'dt-classifier')
  
  
# Launch the MLFlow Web UI, accessible at http://localhost:5000
# !mlflow ui

# Retrieve experiment/run results prorammatically and compare them 
client = mlflow.tracking.MlflowClient()

# Retrieve Experiment information
EXPERIMENT_ID = client.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

# Retrieve Runs information (parameter 'depth', metric 'accuracy', 'balanced_accuracy', 'f1-score')
ALL_RUNS_INFO = client.list_run_infos(EXPERIMENT_ID)
ALL_RUNS_ID = [run.run_id for run in ALL_RUNS_INFO]
ALL_PARAM = [client.get_run(run_id).data.params['depth'] for run_id in ALL_RUNS_ID]
ALL_METRIC = [client.get_run(run_id).data.metrics['accuracy'] for run_id in ALL_RUNS_ID]

# View Runs information
df = pd.DataFrame({'run_id': ALL_RUNS_ID, 'params': ALL_PARAM, 'metrics': ALL_METRIC})

# Retrieve Artifact from best run
best_run_id = df.sort_values('metrics', ascending=False).iloc[0]['run_id']
best_model_path = client.download_artifacts(best_run_id, 'dt-classifier')
best_model = mlflow.sklearn.load_model(best_model_path)