Skip to content

Instantly share code, notes, and snippets.

@petermchale
Last active January 15, 2022 22:10
Show Gist options
  • Save petermchale/a222d75bb242c8082e38f7008febc1b4 to your computer and use it in GitHub Desktop.
Save petermchale/a222d75bb242c8082e38f7008febc1b4 to your computer and use it in GitHub Desktop.
build, train, and deploy a machine-learning model on aws sagemaker
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e6d1eab7",
"metadata": {},
"outputs": [],
"source": [
"# https://sagemaker.readthedocs.io/en/stable/index.html"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e06624fb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Success - the MySageMakerInstance is in the us-west-2 region. You will use the 433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest container for your SageMaker endpoint.\n"
]
}
],
"source": [
"# import libraries\n",
"import boto3, re, sys, math, json, os, sagemaker, urllib.request\n",
"from sagemaker import get_execution_role\n",
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from IPython.display import Image\n",
"from IPython.display import display\n",
"from time import gmtime, strftime\n",
"from sagemaker.predictor import csv_serializer\n",
"\n",
"# Define IAM role\n",
"role = get_execution_role()\n",
"prefix = 'sagemaker/DEMO-xgboost-dm'\n",
"my_region = boto3.session.Session().region_name # set the region of the instance\n",
"\n",
"# this line automatically looks for the XGBoost image URI and builds an XGBoost container.\n",
"xgboost_container = sagemaker.image_uris.retrieve(\"xgboost\", my_region, \"latest\")\n",
"\n",
"print(\"Success - the MySageMakerInstance is in the \" + my_region + \" region. You will use the \" + xgboost_container + \" container for your SageMaker endpoint.\")\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "0e0a86df",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"S3 bucket created successfully\n"
]
}
],
"source": [
"bucket_name = 'build-train-deploy-machine-learning-model-sagemaker-ptm' \n",
"s3 = boto3.resource('s3')\n",
"try:\n",
" if my_region == 'us-east-1':\n",
" s3.create_bucket(Bucket=bucket_name)\n",
" else: \n",
" s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })\n",
" print('S3 bucket created successfully')\n",
"except Exception as e:\n",
" print('S3 error: ',e)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "92208fd0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Success: downloaded bank_clean.csv.\n",
"Success: Data loaded into dataframe.\n"
]
}
],
"source": [
"try:\n",
" urllib.request.urlretrieve (\"https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv\", \"bank_clean.csv\")\n",
" print('Success: downloaded bank_clean.csv.')\n",
"except Exception as e:\n",
" print('Data load error: ',e)\n",
"\n",
"try:\n",
" model_data = pd.read_csv('./bank_clean.csv',index_col=0)\n",
" print('Success: Data loaded into dataframe.')\n",
"except Exception as e:\n",
" print('Data load error: ',e)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "5934dcc2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(28831, 61) (12357, 61)\n"
]
}
],
"source": [
"train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])\n",
"print(train_data.shape, test_data.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ad78715d",
"metadata": {},
"outputs": [],
"source": [
"pd.concat([\n",
" train_data['y_yes'], \n",
" train_data.drop(['y_no', 'y_yes'], axis=1)\n",
"], axis=1).to_csv('train.csv', index=False, header=False)\n",
"\n",
"boto3.Session().resource('s3').Bucket(bucket_name).Object(\n",
" os.path.join(prefix, 'train/train.csv')\n",
").upload_file('train.csv')\n",
"\n",
"s3_input_train = sagemaker.inputs.TrainingInput(\n",
" s3_data='s3://{}/{}/train'.format(bucket_name, prefix), \n",
" content_type='csv'\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1f0f9555",
"metadata": {},
"outputs": [],
"source": [
"sess = sagemaker.Session()\n",
"xgb = sagemaker.estimator.Estimator(\n",
" xgboost_container,\n",
" role, \n",
" instance_count=1, \n",
" instance_type='ml.m4.xlarge',\n",
" output_path='s3://{}/{}/output'.format(bucket_name, prefix),\n",
" sagemaker_session=sess\n",
")\n",
"xgb.set_hyperparameters(\n",
" max_depth=5,\n",
" eta=0.2,\n",
" gamma=4,\n",
" min_child_weight=6,\n",
" subsample=0.8,\n",
" silent=0,\n",
" objective='binary:logistic',\n",
" num_round=100\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c306ad50",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2022-01-15 18:10:26 Starting - Starting the training job...\n",
"2022-01-15 18:10:49 Starting - Launching requested ML instancesProfilerReport-1642270226: InProgress\n",
"......\n",
"2022-01-15 18:11:49 Starting - Preparing the instances for training.........\n",
"2022-01-15 18:13:24 Downloading - Downloading input data\n",
"2022-01-15 18:13:24 Training - Downloading the training image...\n",
"2022-01-15 18:13:50 Uploading - Uploading generated training model\u001b[34mArguments: train\u001b[0m\n",
"\u001b[34m[2022-01-15:18:13:45:INFO] Running standalone xgboost training.\u001b[0m\n",
"\u001b[34m[2022-01-15:18:13:45:INFO] Path /opt/ml/input/data/validation does not exist!\u001b[0m\n",
"\u001b[34m[2022-01-15:18:13:45:INFO] File size need to be processed in the node: 3.38mb. Available memory size in the node: 8345.09mb\u001b[0m\n",
"\u001b[34m[2022-01-15:18:13:45:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
"\u001b[34m[18:13:45] S3DistributionType set as FullyReplicated\u001b[0m\n",
"\u001b[34m[18:13:46] 28831x59 matrix with 1701029 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[0]#011train-error:0.100482\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[1]#011train-error:0.099858\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[2]#011train-error:0.099754\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[3]#011train-error:0.099095\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[4]#011train-error:0.098991\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[5]#011train-error:0.099303\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[6]#011train-error:0.099684\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[7]#011train-error:0.09906\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[8]#011train-error:0.098852\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[9]#011train-error:0.098679\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[10]#011train-error:0.098748\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[11]#011train-error:0.098748\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[12]#011train-error:0.098748\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[13]#011train-error:0.09854\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[14]#011train-error:0.098574\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[15]#011train-error:0.098609\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[16]#011train-error:0.098817\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[17]#011train-error:0.098817\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[18]#011train-error:0.098679\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[19]#011train-error:0.098679\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[20]#011train-error:0.098713\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[21]#011train-error:0.098505\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[22]#011train-error:0.098401\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[23]#011train-error:0.098332\u001b[0m\n",
"\u001b[34m[18:13:46] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[24]#011train-error:0.098332\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[25]#011train-error:0.09795\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[26]#011train-error:0.098262\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[27]#011train-error:0.098193\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 24 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[34m[28]#011train-error:0.097985\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[29]#011train-error:0.097499\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[30]#011train-error:0.097638\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[31]#011train-error:0.097395\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[32]#011train-error:0.097222\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[33]#011train-error:0.097118\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[34]#011train-error:0.097014\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[35]#011train-error:0.09684\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[36]#011train-error:0.096667\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[37]#011train-error:0.096736\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[38]#011train-error:0.096563\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[39]#011train-error:0.096355\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 36 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[34m[40]#011train-error:0.096285\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 38 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[41]#011train-error:0.096528\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 16 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[42]#011train-error:0.096355\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 26 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[43]#011train-error:0.096459\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 36 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[44]#011train-error:0.096355\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 34 pruned nodes, max_depth=2\u001b[0m\n",
"\u001b[34m[45]#011train-error:0.096216\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[46]#011train-error:0.096077\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[47]#011train-error:0.0958\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[48]#011train-error:0.095904\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[49]#011train-error:0.095904\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 34 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[50]#011train-error:0.095834\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[51]#011train-error:0.095765\u001b[0m\n",
"\u001b[34m[18:13:47] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[52]#011train-error:0.095904\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[53]#011train-error:0.095834\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[54]#011train-error:0.095834\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[55]#011train-error:0.09573\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[56]#011train-error:0.095626\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[57]#011train-error:0.095696\u001b[0m\n",
"\u001b[34m[58]#011train-error:0.095661\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 28 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 24 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[59]#011train-error:0.095592\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[60]#011train-error:0.095522\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 16 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[61]#011train-error:0.095383\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 20 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[62]#011train-error:0.095314\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 26 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[63]#011train-error:0.095661\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 32 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[64]#011train-error:0.095661\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[65]#011train-error:0.095418\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 36 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[34m[66]#011train-error:0.095314\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[67]#011train-error:0.095349\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 28 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[34m[68]#011train-error:0.095314\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 28 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[34m[69]#011train-error:0.095314\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 32 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[34m[70]#011train-error:0.095383\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[71]#011train-error:0.095453\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[72]#011train-error:0.095349\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 24 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[73]#011train-error:0.095245\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[74]#011train-error:0.095175\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 18 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[75]#011train-error:0.095071\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[76]#011train-error:0.095175\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[77]#011train-error:0.095002\u001b[0m\n",
"\u001b[34m[18:13:48] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 20 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[78]#011train-error:0.095037\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 32 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[79]#011train-error:0.095037\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[80]#011train-error:0.095002\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[81]#011train-error:0.094794\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[82]#011train-error:0.094759\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[83]#011train-error:0.094933\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[84]#011train-error:0.09469\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[85]#011train-error:0.094759\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[86]#011train-error:0.094482\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[87]#011train-error:0.094447\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 20 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[34m[88]#011train-error:0.094482\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[89]#011train-error:0.094378\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 28 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[34m[90]#011train-error:0.094343\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 28 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[34m[91]#011train-error:0.094274\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[92]#011train-error:0.094239\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 32 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[93]#011train-error:0.094169\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 28 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[94]#011train-error:0.094169\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[95]#011train-error:0.094204\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 28 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[34m[96]#011train-error:0.094204\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[34m[97]#011train-error:0.093927\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 38 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[34m[98]#011train-error:0.093927\u001b[0m\n",
"\u001b[34m[18:13:49] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 32 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[34m[99]#011train-error:0.093892\u001b[0m\n",
"\n",
"2022-01-15 18:14:10 Completed - Training job completed\n",
"Training seconds: 50\n",
"Billable seconds: 50\n"
]
}
],
"source": [
"xgb.fit({'train': s3_input_train})\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "47e81895",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"------!"
]
}
],
"source": [
"# This code deploys the model on a server and creates a SageMaker endpoint that you can access:\n",
"xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "61ab1a1f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(12357,)\n"
]
}
],
"source": [
"from sagemaker.serializers import CSVSerializer\n",
"\n",
"#load the data into an array:\n",
"test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).values \n",
"# set the serializer type:\n",
"xgb_predictor.serializer = CSVSerializer() \n",
"# predict!\n",
"predictions = xgb_predictor.predict(test_data_array).decode('utf-8') \n",
"# and turn the prediction into an array\n",
"predictions_array = np.fromstring(predictions[1:], sep=',') \n",
"print(predictions_array.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "7849d565",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Overall Classification Rate: 89.5%\n",
"\n",
"Predicted No Purchase Purchase\n",
"Observed\n",
"No Purchase 90% (10769) 37% (167)\n",
"Purchase 10% (1133) 63% (288) \n",
"\n"
]
}
],
"source": [
"cm = pd.crosstab(\n",
" index=test_data['y_yes'], \n",
" columns=np.round(predictions_array), \n",
" rownames=['Observed'], \n",
" colnames=['Predicted']\n",
")\n",
"tn = cm.iloc[0,0]\n",
"fn = cm.iloc[1,0]\n",
"tp = cm.iloc[1,1]\n",
"fp = cm.iloc[0,1]\n",
"p = (tp+tn)/(tp+tn+fp+fn)*100\n",
"print(\"\\n{0:<20}{1:<4.1f}%\\n\".format(\"Overall Classification Rate: \", p))\n",
"print(\"{0:<15}{1:<15}{2:>8}\".format(\"Predicted\", \"No Purchase\", \"Purchase\"))\n",
"print(\"Observed\")\n",
"print(\"{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})\".format(\"No Purchase\", tn/(tn+fn)*100,tn, fp/(tp+fp)*100, fp))\n",
"print(\"{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \\n\".format(\"Purchase\", fn/(tn+fn)*100,fn, tp/(tp+fp)*100, tp))\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "07a1dea3",
"metadata": {},
"outputs": [],
"source": [
"xgb_predictor.delete_endpoint(delete_endpoint_config=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "4fa843d7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[{'ResponseMetadata': {'RequestId': 'JZWMR507YKVX53WJ',\n",
" 'HostId': '0dgkgF6qUlnrjiIF7Ui5uFuSp5oG4a6WZvir6Jny/fyuw9yEuOPp4FpbeLB04VXAII8CF33YxJk=',\n",
" 'HTTPStatusCode': 200,\n",
" 'HTTPHeaders': {'x-amz-id-2': '0dgkgF6qUlnrjiIF7Ui5uFuSp5oG4a6WZvir6Jny/fyuw9yEuOPp4FpbeLB04VXAII8CF33YxJk=',\n",
" 'x-amz-request-id': 'JZWMR507YKVX53WJ',\n",
" 'date': 'Sat, 15 Jan 2022 18:35:01 GMT',\n",
" 'content-type': 'application/xml',\n",
" 'transfer-encoding': 'chunked',\n",
" 'server': 'AmazonS3',\n",
" 'connection': 'close'},\n",
" 'RetryAttempts': 0},\n",
" 'Deleted': [{'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-01-15-18-10-26-006/profiler-output/system/training_job_end.ts'},\n",
" {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-01-15-18-10-26-006/profiler-output/system/incremental/2022011518/1642270380.algo-1.json'},\n",
" {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-01-15-18-10-26-006/output/model.tar.gz'},\n",
" {'Key': 'sagemaker/DEMO-xgboost-dm/train/train.csv'},\n",
" {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2022-01-15-18-10-26-006/profiler-output/framework/training_job_end.ts'}]}]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)\n",
"bucket_to_delete.objects.all().delete()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c28d64b4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "conda_python3",
"language": "python",
"name": "conda_python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment