Last active
January 15, 2022 22:10
-
-
Save petermchale/a222d75bb242c8082e38f7008febc1b4 to your computer and use it in GitHub Desktop.
build, train, and deploy a machine-learning model on aws sagemaker
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "e6d1eab7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# https://sagemaker.readthedocs.io/en/stable/index.html" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "e06624fb", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Success - the MySageMakerInstance is in the us-west-2 region. You will use the 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest container for your SageMaker endpoint.\n" | |
] | |
} | |
], | |
"source": [ | |
"# import libraries\n", | |
"import boto3, re, sys, math, json, os, sagemaker, urllib.request\n", | |
"from sagemaker import get_execution_role\n", | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"from IPython.display import Image\n", | |
"from IPython.display import display\n", | |
"from time import gmtime, strftime\n", | |
"from sagemaker.predictor import csv_serializer\n", | |
"\n", | |
"# Define IAM role\n", | |
"role = get_execution_role()\n", | |
"prefix = 'sagemaker/DEMO-xgboost-dm'\n", | |
"my_region = boto3.session.Session().region_name # set the region of the instance\n", | |
"\n", | |
"# this line automatically looks for the XGBoost image URI and builds an XGBoost container.\n", | |
"xgboost_container = sagemaker.image_uris.retrieve(\"xgboost\", my_region, \"latest\")\n", | |
"\n", | |
"print(\"Success - the MySageMakerInstance is in the \" + my_region + \" region. You will use the \" + xgboost_container + \" container for your SageMaker endpoint.\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "0e0a86df", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"S3 bucket created successfully\n" | |
] | |
} | |
], | |
"source": [ | |
"bucket_name = 'build-train-deploy-machine-learning-model-sagemaker-ptm' \n", | |
"s3 = boto3.resource('s3')\n", | |
"try:\n", | |
" if my_region == 'us-east-1':\n", | |
" s3.create_bucket(Bucket=bucket_name)\n", | |
" else: \n", | |
" s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })\n", | |
" print('S3 bucket created successfully')\n", | |
"except Exception as e:\n", | |
" print('S3 error: ',e)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "92208fd0", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Success: downloaded bank_clean.csv.\n", | |
"Success: Data loaded into dataframe.\n" | |
] | |
} | |
], | |
"source": [ | |
"try:\n", | |
" urllib.request.urlretrieve (\"https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv\", \"bank_clean.csv\")\n", | |
" print('Success: downloaded bank_clean.csv.')\n", | |
"except Exception as e:\n", | |
" print('Data load error: ',e)\n", | |
"\n", | |
"try:\n", | |
" model_data = pd.read_csv('./bank_clean.csv',index_col=0)\n", | |
" print('Success: Data loaded into dataframe.')\n", | |
"except Exception as e:\n", | |
" print('Data load error: ',e)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "5934dcc2", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(28831, 61) (12357, 61)\n" | |
] | |
} | |
], | |
"source": [ | |
"train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])\n", | |
"print(train_data.shape, test_data.shape)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "ad78715d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.concat([\n", | |
" train_data['y_yes'], \n", | |
" train_data.drop(['y_no', 'y_yes'], axis=1)\n", | |
"], axis=1).to_csv('train.csv', index=False, header=False)\n", | |
"\n", | |
"boto3.Session().resource('s3').Bucket(bucket_name).Object(\n", | |
" os.path.join(prefix, 'train/train.csv')\n", | |
").upload_file('train.csv')\n", | |
"\n", | |
"s3_input_train = sagemaker.inputs.TrainingInput(\n", | |
" s3_data='s3://{}/{}/train'.format(bucket_name, prefix), \n", | |
" content_type='csv'\n", | |
")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "1f0f9555", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"sess = sagemaker.Session()\n", | |
"xgb = sagemaker.estimator.Estimator(\n", | |
" xgboost_container,\n", | |
" role, \n", | |
" instance_count=1, \n", | |
" instance_type='ml.m4.xlarge',\n", | |
" output_path='s3://{}/{}/output'.format(bucket_name, prefix),\n", | |
" sagemaker_session=sess\n", | |
")\n", | |
"xgb.set_hyperparameters(\n", | |
" max_depth=5,\n", | |
" eta=0.2,\n", | |
" gamma=4,\n", | |
" min_child_weight=6,\n", | |
" subsample=0.8,\n", | |
" silent=0,\n", | |
" objective='binary:logistic',\n", | |
" num_round=100\n", | |
")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "c306ad50", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"2022-01-15 18:10:26 Starting - Starting the training job...\n", | |
"2022-01-15 18:10:49 Starting - Launching requested ML instancesProfilerReport-1642270226: InProgress\n", | |
"......\n", | |
"2022-01-15 18:11:49 Starting - Preparing the instances for training.........\n", | |
"2022-01-15 18:13:24 Downloading - Downloading input data\n", | |
"2022-01-15 18:13:24 Training - Downloading the training image...\n", | |
"2022-01-15 18:13:50 Uploading - Uploading generated training model\u001b[34mArguments: train\u001b[0m\n", | |
"\u001b[34m[2022-01-15:18:13:45:INFO] Running standalone xgboost training.\u001b[0m\n", | |
"\u001b[34m[2022-01-15:18:13:45:INFO] Path /opt/ml/input/data/validation does not exist!\u001b[0m\n", | |
"\u001b[34m[2022-01-15:18:13:45:INFO] File size need to be processed in the node: 3.38mb. Available memory size in the node: 8345.09mb\u001b[0m\n", | |
"\u001b[34m[2022-01-15:18:13:45:INFO] Determined delimiter of CSV input is ','\u001b[0m\n", | |
"\u001b[34m[18:13:45] S3DistributionType set as FullyReplicated\u001b[0m\n", | |
"\u001b[34m[18:13:46] 28831x59 matrix with 1701029 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[0]#011train-error:0.100482\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[1]#011train-error:0.099858\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[2]#011train-error:0.099754\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[3]#011train-error:0.099095\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[4]#011train-error:0.098991\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[5]#011train-error:0.099303\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[6]#011train-error:0.099684\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[7]#011train-error:0.09906\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[8]#011train-error:0.098852\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[9]#011train-error:0.098679\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[10]#011train-error:0.098748\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[11]#011train-error:0.098748\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[12]#011train-error:0.098748\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[13]#011train-error:0.09854\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[14]#011train-error:0.098574\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[15]#011train-error:0.098609\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[16]#011train-error:0.098817\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[17]#011train-error:0.098817\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[18]#011train-error:0.098679\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[19]#011train-error:0.098679\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[20]#011train-error:0.098713\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[21]#011train-error:0.098505\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[22]#011train-error:0.098401\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[23]#011train-error:0.098332\u001b[0m\n", | |
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[24]#011train-error:0.098332\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[25]#011train-error:0.09795\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[26]#011train-error:0.098262\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[27]#011train-error:0.098193\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 24 pruned nodes, max_depth=3\u001b[0m\n", | |
"\u001b[34m[28]#011train-error:0.097985\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[29]#011train-error:0.097499\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[30]#011train-error:0.097638\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[31]#011train-error:0.097395\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[32]#011train-error:0.097222\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[33]#011train-error:0.097118\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[34]#011train-error:0.097014\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[35]#011train-error:0.09684\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[36]#011train-error:0.096667\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[37]#011train-error:0.096736\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[38]#011train-error:0.096563\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[39]#011train-error:0.096355\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 36 pruned nodes, max_depth=3\u001b[0m\n", | |
"\u001b[34m[40]#011train-error:0.096285\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 38 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[41]#011train-error:0.096528\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 16 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[42]#011train-error:0.096355\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 26 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[43]#011train-error:0.096459\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 36 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[44]#011train-error:0.096355\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 34 pruned nodes, max_depth=2\u001b[0m\n", | |
"\u001b[34m[45]#011train-error:0.096216\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[46]#011train-error:0.096077\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[47]#011train-error:0.0958\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[48]#011train-error:0.095904\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[49]#011train-error:0.095904\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 34 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[50]#011train-error:0.095834\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[51]#011train-error:0.095765\u001b[0m\n", | |
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[52]#011train-error:0.095904\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[53]#011train-error:0.095834\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[54]#011train-error:0.095834\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[55]#011train-error:0.09573\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[56]#011train-error:0.095626\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[57]#011train-error:0.095696\u001b[0m\n", | |
"\u001b[34m[58]#011train-error:0.095661\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 28 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 24 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[59]#011train-error:0.095592\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[60]#011train-error:0.095522\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 16 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[61]#011train-error:0.095383\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 20 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[62]#011train-error:0.095314\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 26 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[63]#011train-error:0.095661\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 32 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[64]#011train-error:0.095661\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[65]#011train-error:0.095418\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 36 pruned nodes, max_depth=3\u001b[0m\n", | |
"\u001b[34m[66]#011train-error:0.095314\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[67]#011train-error:0.095349\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 28 pruned nodes, max_depth=3\u001b[0m\n", | |
"\u001b[34m[68]#011train-error:0.095314\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 28 pruned nodes, max_depth=0\u001b[0m\n", | |
"\u001b[34m[69]#011train-error:0.095314\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 32 pruned nodes, max_depth=3\u001b[0m\n", | |
"\u001b[34m[70]#011train-error:0.095383\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[71]#011train-error:0.095453\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[72]#011train-error:0.095349\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 24 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[73]#011train-error:0.095245\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[74]#011train-error:0.095175\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 18 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[75]#011train-error:0.095071\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[76]#011train-error:0.095175\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[77]#011train-error:0.095002\u001b[0m\n", | |
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 20 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[78]#011train-error:0.095037\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 32 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[79]#011train-error:0.095037\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[80]#011train-error:0.095002\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[81]#011train-error:0.094794\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[82]#011train-error:0.094759\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[83]#011train-error:0.094933\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[84]#011train-error:0.09469\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[85]#011train-error:0.094759\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[86]#011train-error:0.094482\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[87]#011train-error:0.094447\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 20 pruned nodes, max_depth=0\u001b[0m\n", | |
"\u001b[34m[88]#011train-error:0.094482\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[89]#011train-error:0.094378\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 28 pruned nodes, max_depth=0\u001b[0m\n", | |
"\u001b[34m[90]#011train-error:0.094343\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 28 pruned nodes, max_depth=4\u001b[0m\n", | |
"\u001b[34m[91]#011train-error:0.094274\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[92]#011train-error:0.094239\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 32 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[93]#011train-error:0.094169\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 28 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[94]#011train-error:0.094169\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[95]#011train-error:0.094204\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 28 pruned nodes, max_depth=0\u001b[0m\n", | |
"\u001b[34m[96]#011train-error:0.094204\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n", | |
"\u001b[34m[97]#011train-error:0.093927\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 38 pruned nodes, max_depth=0\u001b[0m\n", | |
"\u001b[34m[98]#011train-error:0.093927\u001b[0m\n", | |
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 32 pruned nodes, max_depth=0\u001b[0m\n", | |
"\u001b[34m[99]#011train-error:0.093892\u001b[0m\n", | |
"\n", | |
"2022-01-15 18:14:10 Completed - Training job completed\n", | |
"Training seconds: 50\n", | |
"Billable seconds: 50\n" | |
] | |
} | |
], | |
"source": [ | |
"xgb.fit({'train': s3_input_train})\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "47e81895", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"------!" | |
] | |
} | |
], | |
"source": [ | |
"# This code deploys the model on a server and creates a SageMaker endpoint that you can access:\n", | |
"xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "61ab1a1f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(12357,)\n" | |
] | |
} | |
], | |
"source": [ | |
"from sagemaker.serializers import CSVSerializer\n", | |
"\n", | |
"#load the data into an array:\n", | |
"test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).values \n", | |
"# set the serializer type:\n", | |
"xgb_predictor.serializer = CSVSerializer() \n", | |
"# predict!\n", | |
"predictions = xgb_predictor.predict(test_data_array).decode('utf-8') \n", | |
"# and turn the prediction into an array\n", | |
"predictions_array = np.fromstring(predictions[1:], sep=',') \n", | |
"print(predictions_array.shape)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "7849d565", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"Overall Classification Rate: 89.5%\n", | |
"\n", | |
"Predicted No Purchase Purchase\n", | |
"Observed\n", | |
"No Purchase 90% (10769) 37% (167)\n", | |
"Purchase 10% (1133) 63% (288) \n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"cm = pd.crosstab(\n", | |
" index=test_data['y_yes'], \n", | |
" columns=np.round(predictions_array), \n", | |
" rownames=['Observed'], \n", | |
" colnames=['Predicted']\n", | |
")\n", | |
"tn = cm.iloc[0,0]\n", | |
"fn = cm.iloc[1,0]\n", | |
"tp = cm.iloc[1,1]\n", | |
"fp = cm.iloc[0,1]\n", | |
"p = (tp+tn)/(tp+tn+fp+fn)*100\n", | |
"print(\"\\n{0:<20}{1:<4.1f}%\\n\".format(\"Overall Classification Rate: \", p))\n", | |
"print(\"{0:<15}{1:<15}{2:>8}\".format(\"Predicted\", \"No Purchase\", \"Purchase\"))\n", | |
"print(\"Observed\")\n", | |
"print(\"{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})\".format(\"No Purchase\", tn/(tn+fn)*100,tn, fp/(tp+fp)*100, fp))\n", | |
"print(\"{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \\n\".format(\"Purchase\", fn/(tn+fn)*100,fn, tp/(tp+fp)*100, tp))\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "07a1dea3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"xgb_predictor.delete_endpoint(delete_endpoint_config=True)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "4fa843d7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'ResponseMetadata': {'RequestId': 'JZWMR507YKVX53WJ',\n", | |
" 'HostId': '0dgkgF6qUlnrjiIF7Ui5uFuSp5oG4a6WZvir6Jny/fyuw9yEuOPp4FpbeLB04VXAII8CF33YxJk=',\n", | |
" 'HTTPStatusCode': 200,\n", | |
" 'HTTPHeaders': {'x-amz-id-2': '0dgkgF6qUlnrjiIF7Ui5uFuSp5oG4a6WZvir6Jny/fyuw9yEuOPp4FpbeLB04VXAII8CF33YxJk=',\n", | |
" 'x-amz-request-id': 'JZWMR507YKVX53WJ',\n", | |
" 'date': 'Sat, 15 Jan 2022 18:35:01 GMT',\n", | |
" 'content-type': 'application/xml',\n", | |
" 'transfer-encoding': 'chunked',\n", | |
" 'server': 'AmazonS3',\n", | |
" 'connection': 'close'},\n", | |
" 'RetryAttempts': 0},\n", | |
" 'Deleted': [{'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-01-15-18-10-26-006/profiler-output/system/training_job_end.ts'},\n", | |
" {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-01-15-18-10-26-006/profiler-output/system/incremental/2022011518/1642270380.algo-1.json'},\n", | |
" {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-01-15-18-10-26-006/output/model.tar.gz'},\n", | |
" {'Key': 'sagemaker/DEMO-xgboost-dm/train/train.csv'},\n", | |
" {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-01-15-18-10-26-006/profiler-output/framework/training_job_end.ts'}]}]" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)\n", | |
"bucket_to_delete.objects.all().delete()\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "c28d64b4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "conda_python3", | |
"language": "python", | |
"name": "conda_python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment