Skip to content

Instantly share code, notes, and snippets.

@ldrewniak
Created November 5, 2019 14:01
Show Gist options
  • Save ldrewniak/b0c480205b95d7f1b6f8fb1e1b534ee5 to your computer and use it in GitHub Desktop.
Save ldrewniak/b0c480205b95d7f1b6f8fb1e1b534ee5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import mlflow\n",
"import os\n",
"import uuid \n",
"import shutil\n",
"\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense, Activation\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import precision_recall_fscore_support as score\n",
"\n",
"from mlflow.keras import save_model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load data\n",
"train_labels, train_values = [pd.read_csv(f'../data/{f}.csv') for f in ['train_labels', 'train_values']]\n",
"tt_data = train_values.join(train_labels.set_index('building_id'), on='building_id')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# drop unnesesary columns from train data\n",
"tt_x = tt_data.drop(columns=['building_id','damage_grade']).copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# label encode all columns with stings\n",
"for col in tt_x.columns:\n",
" if tt_x[col].dtype == 'object':\n",
" tt_x[col] = LabelEncoder().fit_transform(tt_x[col])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def one_hot_encode(data):\n",
" examples = len(data)\n",
" set_len = len(set(data))\n",
" arr = np.zeros((examples, set_len))\n",
" arr[np.arange(examples), data.apply(lambda x: x-1)] = 1\n",
" return arr\n",
"\n",
"# one-hot encode column to predict\n",
"tt_y = one_hot_encode(tt_data.damage_grade)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# split into test/train in 0.25 ratio\n",
"train_x, test_x, train_y, test_y = train_test_split(tt_x, tt_y, test_size=0.25, random_state=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def run_learning(layer_input, layer_hidden, layer_output, batch_size, nb_epoch, loss, optimizer, metrics):\n",
" # save passed params\n",
" params = locals()\n",
" \n",
" # set path to main directory instead run direcotry\n",
" absolute_path = os.path.abspath(\"../\")\n",
" mlflow.set_tracking_uri(f'file:{absolute_path}/mlruns')\n",
" mlflow.set_experiment('modeling-earthquake-damage')\n",
" \n",
" with mlflow.start_run(run_name='sample_run') as run:\n",
" # log parameters\n",
" for param in params:\n",
" mlflow.log_param(param, params[param])\n",
" \n",
" #create temp folder\n",
" experiment_temp_path = f'.tmp/{uuid.uuid1().hex}'\n",
" \n",
" # create model\n",
" model = Sequential()\n",
" model.add(Dense(layer_input[0], input_dim=layer_input[1], activation=layer_input[2]))\n",
" for neurons, activation in layer_hidden:\n",
" model.add(Dense(neurons, activation=activation))\n",
" model.add(Dense(layer_output[0], activation=layer_output[1]))\n",
" model.compile(loss=loss, optimizer=optimizer, metrics=metrics)\n",
"\n",
" # fit model\n",
" fit_result = model.fit(train_x, train_y,\n",
" batch_size=batch_size,\n",
" epochs=nb_epoch,\n",
" validation_data=(test_x, test_y),\n",
" shuffle=True)\n",
" # predict\n",
" predictions = model.predict(test_x)\n",
"\n",
" precision, recall, fscore, support = score(np.argmax(test_y, axis=1), np.argmax(predictions, axis=1), average='micro')\n",
" \n",
" # save results\n",
" for key in fit_result.history.keys():\n",
" for epoch,value in enumerate(fit_result.history[key], 1):\n",
" mlflow.log_metric(key, value, epoch)\n",
" \n",
" mlflow.log_metric(\"micro_precision\", precision)\n",
" mlflow.log_metric(\"micro_recall\", recall)\n",
" mlflow.log_metric(\"micro_fscore\", fscore)\n",
" \n",
" # save model\n",
" save_model(model, f'{experiment_temp_path}/model')\n",
" shutil.make_archive(f'{experiment_temp_path}/model', 'zip', f'{experiment_temp_path}/model')\n",
" \n",
" # move model to artifact\n",
" mlflow.log_artifact(f'{experiment_temp_path}/model.zip')\n",
" \n",
" #remove temp files as they were already moved\n",
" shutil.rmtree('.tmp')\n",
" \n",
"# some parameters \n",
"params = {\n",
" 'layer_input': (76, 38, 'sigmoid'),\n",
" 'layer_hidden': [(76, 'sigmoid')],\n",
" 'layer_output': (3, 'softmax'),\n",
" 'batch_size': 100,\n",
" 'nb_epoch': 10,\n",
" 'loss': 'categorical_crossentropy',\n",
" 'optimizer': 'adamax', \n",
" 'metrics': ['accuracy']\n",
"}\n",
"# run the code\n",
"run_learning(**params)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"params = {\n",
" 'layer_input': (76, 38, 'sigmoid'),\n",
" 'layer_hidden': [(76, 'sigmoid')],\n",
" 'layer_output': (3, 'softmax'),\n",
" 'batch_size': 1000,\n",
" 'nb_epoch': 10,\n",
" 'loss': 'categorical_crossentropy',\n",
" 'optimizer': 'adamax', \n",
" 'metrics': ['accuracy']\n",
"}\n",
"run_learning(**params)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"params = {\n",
" 'layer_input': (152, 38, 'sigmoid'),\n",
" 'layer_hidden': [(152, 'sigmoid')],\n",
" 'layer_output': (3, 'softmax'),\n",
" 'batch_size': 100,\n",
" 'nb_epoch': 10,\n",
" 'loss': 'categorical_crossentropy',\n",
" 'optimizer': 'adamax', \n",
" 'metrics': ['accuracy']\n",
"}\n",
"run_learning(**params)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"params = {\n",
" 'layer_input': (152, 38, 'sigmoid'),\n",
" 'layer_hidden': [(152, 'sigmoid')],\n",
" 'layer_output': (3, 'softmax'),\n",
" 'batch_size': 100,\n",
" 'nb_epoch': 10,\n",
" 'loss': 'categorical_crossentropy',\n",
" 'optimizer': 'adamax', \n",
" 'metrics': ['accuracy']\n",
"}\n",
"run_learning(**params)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment