Skip to content

Instantly share code, notes, and snippets.

@cosmincatalin
Created December 6, 2018 12:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cosmincatalin/75da31fdc3007ee1e7f8424c5512adcf to your computer and use it in GitHub Desktop.
Save cosmincatalin/75da31fdc3007ee1e7f8424c5512adcf to your computer and use it in GitHub Desktop.
Modeling of an MXNet regression problem
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import urllib.request\n",
"\n",
"urllib.request \\\n",
" .urlretrieve(\"https://raw.githubusercontent.com/cosmincatalin/mxnet-onnx-mlnet/master/data/test.csv\", \"test.csv\");\n",
"\n",
"urllib.request \\\n",
" .urlretrieve(\"https://raw.githubusercontent.com/cosmincatalin/mxnet-onnx-mlnet/master/data/train.csv\", \"train.csv\");"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df_train = pd.read_csv(\"train.csv\")\n",
"df_test = pd.read_csv(\"test.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import mxnet as mx\n",
"\n",
"train_X = mx.nd.array(df_train.drop([\"vendor_id\", \"payment_type\", \"fare_amount\"], axis=1).values)\n",
"train_y = mx.nd.array(df_train.fare_amount.values)\n",
"test_X = mx.nd.array(df_test.drop([\"vendor_id\", \"payment_type\", \"fare_amount\"], axis=1).values)\n",
"test_y = mx.nd.array(df_test.fare_amount.values)\n",
"train_nd = list(zip(train_X, train_y))\n",
"test_nd = list(zip(test_X, test_y))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from os import makedirs\n",
"from tempfile import gettempdir\n",
"from pickle import dump\n",
"\n",
"def save_to_disk(data, type):\n",
" makedirs(\"{}/pvdwgmas/data/pickles/{}\".format(gettempdir(), type))\n",
" with open(\"{}/pvdwgmas/data/pickles/{}/data.p\".format(gettempdir(), type), \"wb\") as out:\n",
" dump(data, out)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"save_to_disk(train_nd, \"train\")\n",
"save_to_disk(test_nd, \"test\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import sagemaker\n",
"\n",
"sagemaker_session = sagemaker.Session()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"inputs = sagemaker_session.upload_data(path=\"{}/pvdwgmas/data/pickles\".format(gettempdir()),\n",
" bucket=\"redacted\", key_prefix=\"cosmin/sagemaker/demo\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from sagemaker.mxnet import MXNet\n",
"\n",
"estimator = MXNet(\"mxnet-onnx-sagemaker-script.py\",\n",
" role=sagemaker.get_execution_role(),\n",
" train_instance_count=1,\n",
" train_instance_type=\"ml.p2.xlarge\",\n",
" py_version=\"py3\",\n",
" framework_version=\"1.3.0\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:sagemaker:Created S3 bucket: redacted\n",
"INFO:sagemaker:Creating training-job with name: sagemaker-mxnet-2018-12-04-12-19-28-732\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2018-12-04 12:19:29 Starting - Starting the training job...\n",
"2018-12-04 12:19:30 Starting - Launching requested ML instances......\n",
"2018-12-04 12:20:30 Starting - Preparing the instances for training......\n",
"2018-12-04 12:21:52 Downloading - Downloading input data...\n",
"2018-12-04 12:22:22 Training - Downloading the training image..\n",
"\u001b[31m2018-12-04 12:22:36,529 sagemaker-containers INFO Imported framework sagemaker_mxnet_container.training\u001b[0m\n",
"\u001b[31m2018-12-04 12:22:36,555 sagemaker_mxnet_container.training INFO MXNet training environment: {'SM_CHANNELS': '[\"training\"]', 'SM_HPS': '{}', 'SM_OUTPUT_INTERMEDIATE_DIR': '/opt/ml/output/intermediate', 'SM_TRAINING_ENV': '{\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"training\":\"/opt/ml/input/data/training\"},\"current_host\":\"algo-1\",\"framework_module\":\"sagemaker_mxnet_container.training:main\",\"hosts\":[\"algo-1\"],\"hyperparameters\":{},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"training\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"job_name\":\"sagemaker-mxnet-2018-12-04-12-19-28-732\",\"log_level\":20,\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://redacted/sagemaker-mxnet-2018-12-04-12-19-28-732/source/sourcedir.tar.gz\",\"module_name\":\"mxnet-onnx-sagemaker-script\",\"network_interface_name\":\"ethwe\",\"num_cpus\":4,\"num_gpus\":1,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"ethwe\"},\"user_entry_point\":\"mxnet-onnx-sagemaker-script.py\"}', 'SM_OUTPUT_DATA_DIR': '/opt/ml/output/data', 'SM_INPUT_DIR': '/opt/ml/input', 'SM_CHANNEL_TRAINING': '/opt/ml/input/data/training', 'SM_FRAMEWORK_MODULE': 'sagemaker_mxnet_container.training:main', 'SM_OUTPUT_DIR': '/opt/ml/output', 'SM_NUM_GPUS': '1', 'SM_MODULE_NAME': 'mxnet-onnx-sagemaker-script', 'SM_USER_ENTRY_POINT': 'mxnet-onnx-sagemaker-script.py', 'SM_RESOURCE_CONFIG': '{\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"ethwe\"}', 'SM_FRAMEWORK_PARAMS': '{}', 'SM_LOG_LEVEL': '20', 'SM_MODULE_DIR': 's3://redacted/sagemaker-mxnet-2018-12-04-12-19-28-732/source/sourcedir.tar.gz', 'SM_INPUT_CONFIG_DIR': '/opt/ml/input/config', 'SM_CURRENT_HOST': 'algo-1', 'SM_USER_ARGS': '[]', 'SM_INPUT_DATA_CONFIG': '{\"training\u001b[0m\n",
"\u001b[31m2018-12-04 12:22:36,831 sagemaker-containers INFO Module mxnet-onnx-sagemaker-script does not provide a setup.py. \u001b[0m\n",
"\u001b[31mGenerating setup.py\u001b[0m\n",
"\u001b[31m2018-12-04 12:22:36,832 sagemaker-containers INFO Generating setup.cfg\u001b[0m\n",
"\u001b[31m2018-12-04 12:22:36,832 sagemaker-containers INFO Generating MANIFEST.in\u001b[0m\n",
"\u001b[31m2018-12-04 12:22:36,832 sagemaker-containers INFO Installing module with the following command:\u001b[0m\n",
"\u001b[31m/usr/bin/python -m pip install -U . \u001b[0m\n",
"\u001b[31mProcessing /opt/ml/code\u001b[0m\n",
"\u001b[31mBuilding wheels for collected packages: mxnet-onnx-sagemaker-script\n",
" Running setup.py bdist_wheel for mxnet-onnx-sagemaker-script: started\u001b[0m\n",
"\u001b[31m Running setup.py bdist_wheel for mxnet-onnx-sagemaker-script: finished with status 'done'\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-epz150ar/wheels/35/24/16/37574d11bf9bde50616c67372a334f94fa8356bc7164af8ca3\u001b[0m\n",
"\u001b[31mSuccessfully built mxnet-onnx-sagemaker-script\u001b[0m\n",
"\u001b[31mInstalling collected packages: mxnet-onnx-sagemaker-script\u001b[0m\n",
"\u001b[31mSuccessfully installed mxnet-onnx-sagemaker-script-1.0.0\u001b[0m\n",
"\u001b[31m2018-12-04 12:22:38,628 sagemaker-containers INFO Invoking user script\n",
"\u001b[0m\n",
"\u001b[31mTraining Env:\n",
"\u001b[0m\n",
"\u001b[31m{\n",
" \"num_cpus\": 4,\n",
" \"input_dir\": \"/opt/ml/input\",\n",
" \"module_dir\": \"s3://redacted/sagemaker-mxnet-2018-12-04-12-19-28-732/source/sourcedir.tar.gz\",\n",
" \"module_name\": \"mxnet-onnx-sagemaker-script\",\n",
" \"input_data_config\": {\n",
" \"training\": {\n",
" \"S3DistributionType\": \"FullyReplicated\",\n",
" \"TrainingInputMode\": \"File\",\n",
" \"RecordWrapperType\": \"None\"\n",
" }\n",
" },\n",
" \"network_interface_name\": \"ethwe\",\n",
" \"hyperparameters\": {},\n",
" \"current_host\": \"algo-1\",\n",
" \"input_config_dir\": \"/opt/ml/input/config\",\n",
" \"resource_config\": {\n",
" \"hosts\": [\n",
" \"algo-1\"\n",
" ],\n",
" \"network_interface_name\": \"ethwe\",\n",
" \"current_host\": \"algo-1\"\n",
" },\n",
" \"hosts\": [\n",
" \"algo-1\"\n",
" ],\n",
" \"output_data_dir\": \"/opt/ml/output/data\",\n",
" \"num_gpus\": 1,\n",
" \"channel_input_dirs\": {\n",
" \"training\": \"/opt/ml/input/data/training\"\n",
" },\n",
" \"model_dir\": \"/opt/ml/model\",\n",
" \"additional_framework_parameters\": {},\n",
" \"job_name\": \"sagemaker-mxnet-2018-12-04-12-19-28-732\",\n",
" \"user_entry_point\": \"mxnet-onnx-sagemaker-script.py\",\n",
" \"framework_module\": \"sagemaker_mxnet_container.training:main\",\n",
" \"output_intermediate_dir\": \"/opt/ml/output/intermediate\",\n",
" \"output_dir\": \"/opt/ml/output\",\n",
" \"log_level\": 20\u001b[0m\n",
"\u001b[31m}\n",
"\u001b[0m\n",
"\u001b[31mEnvironment variables:\n",
"\u001b[0m\n",
"\u001b[31mSM_FRAMEWORK_PARAMS={}\u001b[0m\n",
"\u001b[31mSM_CHANNELS=[\"training\"]\u001b[0m\n",
"\u001b[31mSM_TRAINING_ENV={\"additional_framework_parameters\":{},\"channel_input_dirs\":{\"training\":\"/opt/ml/input/data/training\"},\"current_host\":\"algo-1\",\"framework_module\":\"sagemaker_mxnet_container.training:main\",\"hosts\":[\"algo-1\"],\"hyperparameters\":{},\"input_config_dir\":\"/opt/ml/input/config\",\"input_data_config\":{\"training\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}},\"input_dir\":\"/opt/ml/input\",\"job_name\":\"sagemaker-mxnet-2018-12-04-12-19-28-732\",\"log_level\":20,\"model_dir\":\"/opt/ml/model\",\"module_dir\":\"s3://redacted/sagemaker-mxnet-2018-12-04-12-19-28-732/source/sourcedir.tar.gz\",\"module_name\":\"mxnet-onnx-sagemaker-script\",\"network_interface_name\":\"ethwe\",\"num_cpus\":4,\"num_gpus\":1,\"output_data_dir\":\"/opt/ml/output/data\",\"output_dir\":\"/opt/ml/output\",\"output_intermediate_dir\":\"/opt/ml/output/intermediate\",\"resource_config\":{\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"ethwe\"},\"user_entry_point\":\"mxnet-onnx-sagemaker-script.py\"}\u001b[0m\n",
"\u001b[31mSM_NETWORK_INTERFACE_NAME=ethwe\u001b[0m\n",
"\u001b[31mSM_HPS={}\u001b[0m\n",
"\u001b[31mSM_OUTPUT_DATA_DIR=/opt/ml/output/data\u001b[0m\n",
"\u001b[31mSM_INPUT_DIR=/opt/ml/input\u001b[0m\n",
"\u001b[31mSM_OUTPUT_DIR=/opt/ml/output\u001b[0m\n",
"\u001b[31mSM_FRAMEWORK_MODULE=sagemaker_mxnet_container.training:main\u001b[0m\n",
"\u001b[31mSM_HOSTS=[\"algo-1\"]\u001b[0m\n",
"\u001b[31mSM_NUM_GPUS=1\u001b[0m\n",
"\u001b[31mSM_MODULE_NAME=mxnet-onnx-sagemaker-script\u001b[0m\n",
"\u001b[31mSM_RESOURCE_CONFIG={\"current_host\":\"algo-1\",\"hosts\":[\"algo-1\"],\"network_interface_name\":\"ethwe\"}\u001b[0m\n",
"\u001b[31mSM_USER_ENTRY_POINT=mxnet-onnx-sagemaker-script.py\u001b[0m\n",
"\u001b[31mSM_LOG_LEVEL=20\u001b[0m\n",
"\u001b[31mSM_MODULE_DIR=s3://redacted/sagemaker-mxnet-2018-12-04-12-19-28-732/source/sourcedir.tar.gz\u001b[0m\n",
"\u001b[31mSM_INPUT_CONFIG_DIR=/opt/ml/input/config\u001b[0m\n",
"\u001b[31mSM_CURRENT_HOST=algo-1\u001b[0m\n",
"\u001b[31mSM_USER_ARGS=[]\u001b[0m\n",
"\u001b[31mSM_INPUT_DATA_CONFIG={\"training\":{\"RecordWrapperType\":\"None\",\"S3DistributionType\":\"FullyReplicated\",\"TrainingInputMode\":\"File\"}}\u001b[0m\n",
"\u001b[31mSM_OUTPUT_INTERMEDIATE_DIR=/opt/ml/output/intermediate\u001b[0m\n",
"\u001b[31mSM_NUM_CPUS=4\u001b[0m\n",
"\u001b[31mSM_CHANNEL_TRAINING=/opt/ml/input/data/training\u001b[0m\n",
"\u001b[31mPYTHONPATH=/usr/local/bin:/usr/lib/python35.zip:/usr/lib/python3.5:/usr/lib/python3.5/plat-x86_64-linux-gnu:/usr/lib/python3.5/lib-dynload:/usr/local/lib/python3.5/dist-packages:/usr/lib/python3/dist-packages\u001b[0m\n",
"\u001b[31mSM_MODEL_DIR=/opt/ml/model\n",
"\u001b[0m\n",
"\u001b[31mInvoking script with the following command:\n",
"\u001b[0m\n",
"\u001b[31m/usr/bin/python -m mxnet-onnx-sagemaker-script\n",
"\n",
"\u001b[0m\n",
"\n",
"2018-12-04 12:22:36 Training - Training image download completed. Training in progress.\u001b[31mEpoch 0. Loss: 10.111188573876328, Train_mae 1.2571593183347431, Test_mae 1.271174145998001\u001b[0m\n",
"\u001b[31mEpoch 1. Loss: 11.19286100042664, Train_mae 1.103177388820768, Test_mae 1.1245996921424866\u001b[0m\n",
"\u001b[31mEpoch 2. Loss: 8.147909843341747, Train_mae 0.8778304503157415, Test_mae 0.9007742985286713\u001b[0m\n",
"\u001b[31mEpoch 3. Loss: 11.027663687180395, Train_mae 1.4317829073152097, Test_mae 1.4511440588378905\u001b[0m\n",
"\u001b[31mEpoch 4. Loss: 7.021085305880813, Train_mae 1.379995596678782, Test_mae 1.4081653979415893\u001b[0m\n",
"\n",
"2018-12-04 12:30:32 Uploading - Uploading generated training model\u001b[31mINFO:root:Converting json and weight file to sym and params\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 0, op: null, name: data\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 1, op: null, name: hybridsequential0_dense0_weight\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 2, op: null, name: hybridsequential0_dense0_bias\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 3, op: FullyConnected, name: hybridsequential0_dense0_fwd\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 4, op: Dropout, name: hybridsequential0_dropout0_fwd\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 5, op: null, name: hybridsequential0_dense1_weight\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 6, op: null, name: hybridsequential0_dense1_bias\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 7, op: FullyConnected, name: hybridsequential0_dense1_fwd\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 8, op: Dropout, name: hybridsequential0_dropout1_fwd\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 9, op: null, name: hybridsequential0_dense2_weight\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 10, op: null, name: hybridsequential0_dense2_bias\u001b[0m\n",
"\u001b[31mINFO:root:Converting idx: 11, op: FullyConnected, name: hybridsequential0_dense2_fwd\u001b[0m\n",
"\u001b[31mINFO:root:Output node is: hybridsequential0_dense2_fwd\u001b[0m\n",
"\u001b[31mINFO:root:Input shape of the model [(1, 4)] \u001b[0m\n",
"\u001b[31mINFO:root:Exported ONNX file /opt/ml/model/model.onnx saved to disk\u001b[0m\n",
"\u001b[31m2018-12-04 12:30:29,335 sagemaker-containers INFO Reporting training SUCCESS\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"2018-12-04 12:30:37 Completed - Training job completed\n",
"Billable seconds: 525\n"
]
}
],
"source": [
"estimator.fit(inputs)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'s3://redacted/sagemaker-mxnet-2018-12-04-12-19-28-732/output/model.tar.gz'"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"estimator.model_data"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "conda_mxnet_p36",
"language": "python",
"name": "conda_mxnet_p36"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment