Python

Pandas Multi-Index

Pandas DataFrame create multiindex using existing columns import pandas as pd df = pd.read_csv("./data/dataset.csv") df = df.set_index(["INSTANCES", "TIMEPOINTS"], inplace=False)

Read tempfile

Read content of file using the tempfile.NamedTemporaryFile class. import tempfile from sktime.datasets import load_from_tsfile_to_dataframe tmp_file = tempfile.NamedTemporaryFile(delete=False) df_tmp = None try: tmp_file.write(content) tmp_file.seek(0) print(tmp_file.name) df_tmp = load_from_tsfile_to_dataframe( tmp_file.name, return_separate_X_and_y=False ) finally: tmp_file.close() os.unlink(tmp_file.name)

Mlflow Quickstart

MLFlow Quickstart import mlflow from sklearn.tree import DecisionTreeClassifier from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score EXPERIMENT_NAME = 'mlflow-experiment' EXPERIMENT_ID = mlflow.create_experiment(EXPERIMENT_NAME) X_train, y_train, X_test, y_test = ... depth = ... model = DecisionTreeClassifier(max_depth=depth) model.fit(X_train, y_train) y_pred = model.predict(X_test) acc = accuracy_score(y_test, y_pred) balanced_acc = balanced_accuracy_score(y_true, y_pred) f1 = f1_score(y_true) RUN_NAME = 'run-1' with mlflow.start_run(experiment_id=EXPERIMENT_ID, run_name=RUN_NAME) as run: # Track parameters mlflow.log_param('depth', depth) # Track metrics mlflow.log_metric('accuracy', accuracy) mlflow.log_metrics({'balanced_acc': balanced_acc, 'f1': f1}) # Track model mlflow.sklearn.log_model(model, 'dt-classifier') # Launch the MLFlow Web UI, accessible at http://localhost:5000 # !mlflow ui # Retrieve experiment/run results prorammatically and compare them client = mlflow.tracking.MlflowClient() # Retrieve Experiment information EXPERIMENT_ID = client.get_experiment_by_name(EXPERIMENT_NAME).experiment_id # Retrieve Runs information (parameter 'depth', metric 'accuracy', 'balanced_accuracy', 'f1-score') ALL_RUNS_INFO = client.list_run_infos(EXPERIMENT_ID) ALL_RUNS_ID = [run.run_id for run in ALL_RUNS_INFO] ALL_PARAM = [client.get_run(run_id).data.params['depth'] for run_id in ALL_RUNS_ID] ALL_METRIC = [client.get_run(run_id).data.metrics['accuracy'] for run_id in ALL_RUNS_ID] # View Runs information df = pd.DataFrame({'run_id': ALL_RUNS_ID, 'params': ALL_PARAM, 'metrics': ALL_METRIC}) # Retrieve Artifact from best run best_run_id = df.sort_values('metrics', ascending=False).iloc[0]['run_id'] best_model_path = client.download_artifacts(best_run_id, 'dt-classifier') best_model = mlflow.sklearn.load_model(best_model_path)

Numpy3d array to Sktime panel data

Convert a numpy3d data to nested Panel data to use with sktime library import numpy as np from sktime.datatypes import convert from sktime.datatypes._panel._convert import from_multi_index_to_nested # import the numpy3d data # X_train has the shape of (num_instances, num_features, num_timepoints) X_train = np.load('./data/X_train.npy') # Convert the numpy3d data to Multi-Index Pandas DataFrame X_train = convert(X_train, from_type="numpy3D", to_type="pd-multiindex") # Convert the Multi-Index Pandas DataFrame to Panel Data X_train = from_multi_index_to_nested(X_train) # Shorter solution to convert a numpy3d to the panel data format X_train = from_3d_numpy_to_nested( np.load('./data/X_train.npy') ) Numpy3D Array: ...

Add Python Venv to Jupyther Notebook

How to add a python virtual env to jupyther notebook : Activate the virtual environment, e.g. myvenv. Install ipykernel which provides the IPython kernel for Jupyter Add the virtual environment,myvenv, to Jupyter by typing: python -m ipykernel install --user --name=myvenv