Skip to content

Instantly share code, notes, and snippets.

@sabman
Created November 2, 2018 12:02
Show Gist options
  • Save sabman/6a355d4c4738079c5c8f4938fb3e9af9 to your computer and use it in GitHub Desktop.
Save sabman/6a355d4c4738079c5c8f4938fb3e9af9 to your computer and use it in GitHub Desktop.
Sagemaker-Notebook for Bank Data
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Success - the MySageMakerInstance is in the eu-central-1 region. You will use the 813361260812.dkr.ecr.eu-central-1.amazonaws.com/xgboost:latest container for your SageMaker endpoint.\n"
]
}
],
"source": [
"# import libraries\n",
"import boto3, re, sys, math, json, os, sagemaker, urllib.request\n",
"from sagemaker import get_execution_role\n",
"import numpy as np \n",
"import pandas as pd \n",
"import matplotlib.pyplot as plt \n",
"from IPython.display import Image \n",
"from IPython.display import display \n",
"from time import gmtime, strftime \n",
"from sagemaker.predictor import csv_serializer \n",
"\n",
"# Define IAM role\n",
"role = get_execution_role()\n",
"prefix = 'sagemaker/DEMO-xgboost-dm'\n",
"containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',\n",
" 'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',\n",
" 'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',\n",
" 'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest',\n",
" 'eu-central-1': '813361260812.dkr.ecr.eu-central-1.amazonaws.com/xgboost:latest'\n",
" } # each region has its XGBoost container\n",
"my_region = boto3.session.Session().region_name # set the region of the instance\n",
"print(\"Success - the MySageMakerInstance is in the \" + my_region + \" region. You will use the \" + containers[my_region] + \" container for your SageMaker endpoint.\")\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"S3 bucket created successfully\n"
]
}
],
"source": [
"bucket_name = 'sagemaker-fusce' # <--- change this variable to a unique name for your bucket\n",
"s3 = boto3.resource('s3')\n",
"try:\n",
" if my_region == 'us-east-1':\n",
" s3.create_bucket(Bucket=bucket_name)\n",
" else: \n",
" s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })\n",
" print('S3 bucket created successfully')\n",
"except Exception as e:\n",
" print('S3 error: ',e)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Success: downloaded bank_clean.csv.\n",
"Success: Data loaded into dataframe.\n"
]
}
],
"source": [
"try:\n",
" urllib.request.urlretrieve (\"https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv\", \"bank_clean.csv\")\n",
" print('Success: downloaded bank_clean.csv.')\n",
"except Exception as e:\n",
" print('Data load error: ',e)\n",
"\n",
"try:\n",
" model_data = pd.read_csv('./bank_clean.csv',index_col=0)\n",
" print('Success: Data loaded into dataframe.')\n",
"except Exception as e:\n",
" print('Data load error: ',e)\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(28831, 61) (12357, 61)\n"
]
}
],
"source": [
"train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])\n",
"print(train_data.shape, test_data.shape)\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)\n",
"boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')\n",
"s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"sess = sagemaker.Session()\n",
"xgb = sagemaker.estimator.Estimator(containers[my_region],role, train_instance_count=1, train_instance_type='ml.m4.xlarge',output_path='s3://{}/{}/output'.format(bucket_name, prefix),sagemaker_session=sess)\n",
"xgb.set_hyperparameters(max_depth=5,eta=0.2,gamma=4,min_child_weight=6,subsample=0.8,silent=0,objective='binary:logistic',num_round=100)\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:sagemaker:Creating training-job with name: xgboost-2018-11-02-09-23-03-633\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"2018-11-02 09:23:03 Starting - Starting the training job...\n",
"2018-11-02 09:23:12 Starting - Launching requested ML instances......\n",
"2018-11-02 09:24:12 Starting - Preparing the instances for training......\n",
"2018-11-02 09:25:10 Downloading - Downloading input data\n",
"2018-11-02 09:25:10 Training - Downloading the training image.\n",
"\u001b[31mArguments: train\u001b[0m\n",
"\u001b[31m[2018-11-02:09:25:37:INFO] Running standalone xgboost training.\u001b[0m\n",
"\u001b[31m[2018-11-02:09:25:37:INFO] Path /opt/ml/input/data/validation does not exist!\u001b[0m\n",
"\u001b[31m[2018-11-02:09:25:37:INFO] File size need to be processed in the node: 3.38mb. Available memory size in the node: 8553.16mb\u001b[0m\n",
"\u001b[31m[2018-11-02:09:25:37:INFO] Determined delimiter of CSV input is ','\u001b[0m\n",
"\u001b[31m[09:25:37] S3DistributionType set as FullyReplicated\u001b[0m\n",
"\u001b[31m[09:25:37] 28831x59 matrix with 1701029 entries loaded from /opt/ml/input/data/train?format=csv&label_column=0&delimiter=,\u001b[0m\n",
"\u001b[31m[09:25:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[0]#011train-error:0.100482\u001b[0m\n",
"\u001b[31m[09:25:37] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[1]#011train-error:0.099858\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[2]#011train-error:0.099476\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[3]#011train-error:0.099025\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[4]#011train-error:0.099476\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[5]#011train-error:0.099372\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[6]#011train-error:0.09906\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[7]#011train-error:0.099025\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[8]#011train-error:0.099164\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[9]#011train-error:0.098817\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[10]#011train-error:0.098817\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[11]#011train-error:0.098817\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[12]#011train-error:0.098852\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[13]#011train-error:0.098574\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[14]#011train-error:0.098609\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[15]#011train-error:0.098401\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 26 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[16]#011train-error:0.098401\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[17]#011train-error:0.098297\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[18]#011train-error:0.098054\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[19]#011train-error:0.098158\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[20]#011train-error:0.098193\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 36 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[21]#011train-error:0.098193\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 28 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[22]#011train-error:0.098124\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[23]#011train-error:0.098124\u001b[0m\n",
"\u001b[31m[09:25:38] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[24]#011train-error:0.097881\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 34 extra nodes, 4 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[25]#011train-error:0.097777\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[26]#011train-error:0.097742\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[27]#011train-error:0.097707\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[28]#011train-error:0.097291\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 32 extra nodes, 4 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[29]#011train-error:0.097152\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 6 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[30]#011train-error:0.097256\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[31]#011train-error:0.097083\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[32]#011train-error:0.097083\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[33]#011train-error:0.097083\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[34]#011train-error:0.097152\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 28 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[35]#011train-error:0.097256\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 22 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[36]#011train-error:0.097187\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 26 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[37]#011train-error:0.097118\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[38]#011train-error:0.097152\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[39]#011train-error:0.096736\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 14 pruned nodes, max_depth=2\u001b[0m\n",
"\u001b[31m[40]#011train-error:0.09691\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[41]#011train-error:0.096736\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[42]#011train-error:0.096771\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 12 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[43]#011train-error:0.096806\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[44]#011train-error:0.096736\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 14 pruned nodes, max_depth=2\u001b[0m\n",
"\u001b[31m[45]#011train-error:0.096806\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[46]#011train-error:0.096459\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 26 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[47]#011train-error:0.096424\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 16 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[31m[48]#011train-error:0.096528\u001b[0m\n",
"\u001b[31m[09:25:39] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[49]#011train-error:0.096563\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 38 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[50]#011train-error:0.096597\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[51]#011train-error:0.096528\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[52]#011train-error:0.096112\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[53]#011train-error:0.096077\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[54]#011train-error:0.09632\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 16 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[31m[55]#011train-error:0.09632\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 30 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[31m[56]#011train-error:0.096147\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[57]#011train-error:0.09632\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[58]#011train-error:0.096112\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 34 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[59]#011train-error:0.096042\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 28 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[60]#011train-error:0.096008\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 26 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[61]#011train-error:0.096042\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[62]#011train-error:0.096077\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 30 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[31m[63]#011train-error:0.096147\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 18 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[64]#011train-error:0.096216\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[65]#011train-error:0.09632\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 10 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[66]#011train-error:0.096181\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 20 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[31m[67]#011train-error:0.095904\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 16 extra nodes, 24 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[68]#011train-error:0.096008\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 26 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[69]#011train-error:0.096042\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 22 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[70]#011train-error:0.096077\u001b[0m\n",
"\u001b[31m[09:25:40] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 8 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[71]#011train-error:0.095938\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 30 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[72]#011train-error:0.095938\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 28 pruned nodes, max_depth=2\u001b[0m\n",
"\u001b[31m[73]#011train-error:0.096008\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 12 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[74]#011train-error:0.095869\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 34 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[75]#011train-error:0.095938\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 24 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[76]#011train-error:0.095904\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[77]#011train-error:0.0958\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 14 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[31m[78]#011train-error:0.09573\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 18 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[31m[79]#011train-error:0.095765\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[80]#011train-error:0.095834\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 20 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[81]#011train-error:0.095592\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 22 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[31m[82]#011train-error:0.095557\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 26 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[31m[83]#011train-error:0.095557\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 32 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[84]#011train-error:0.095453\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 8 pruned nodes, max_depth=4\u001b[0m\n",
"\u001b[31m[85]#011train-error:0.095453\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 24 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[86]#011train-error:0.095453\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 24 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[87]#011train-error:0.095453\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 20 extra nodes, 16 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[88]#011train-error:0.095349\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 30 pruned nodes, max_depth=2\u001b[0m\n",
"\u001b[31m[89]#011train-error:0.095037\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 14 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[90]#011train-error:0.095106\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 42 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[91]#011train-error:0.095037\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 30 pruned nodes, max_depth=0\u001b[0m\n",
"\u001b[31m[92]#011train-error:0.095106\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 14 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[93]#011train-error:0.095314\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 24 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[94]#011train-error:0.095314\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 24 pruned nodes, max_depth=3\u001b[0m\n",
"\u001b[31m[95]#011train-error:0.095314\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 30 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[96]#011train-error:0.095279\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[97]#011train-error:0.094828\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 22 pruned nodes, max_depth=2\u001b[0m\n",
"\u001b[31m[98]#011train-error:0.094863\u001b[0m\n",
"\u001b[31m[09:25:41] src/tree/updater_prune.cc:74: tree pruning end, 1 roots, 30 extra nodes, 12 pruned nodes, max_depth=5\u001b[0m\n",
"\u001b[31m[99]#011train-error:0.094759\u001b[0m\n",
"\n",
"2018-11-02 09:25:49 Uploading - Uploading generated training model\n",
"2018-11-02 09:25:49 Completed - Training job completed\n",
"Billable seconds: 45\n"
]
}
],
"source": [
"xgb.fit({'train': s3_input_train})\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:sagemaker:Creating model with name: xgboost-2018-11-02-11-18-41-698\n",
"INFO:sagemaker:Creating endpoint with name xgboost-2018-11-02-09-23-03-633\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"--------------------------------------------------------------!"
]
}
],
"source": [
"xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.t2.medium')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(12357,)\n"
]
}
],
"source": [
"test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).as_matrix() #load the data into an array\n",
"xgb_predictor.content_type = 'text/csv' # set the data type for an inference\n",
"xgb_predictor.serializer = csv_serializer # set the serializer type\n",
"predictions = xgb_predictor.predict(test_data_array).decode('utf-8') # predict!\n",
"predictions_array = np.fromstring(predictions[1:], sep=',') # and turn the prediction into an array\n",
"print(predictions_array.shape)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Overall Classification Rate: 89.5%\n",
"\n",
"Predicted No Purchase Purchase\n",
"Observed\n",
"No Purchase 90% (10785) 35% (151)\n",
"Purchase 10% (1143) 65% (278) \n",
"\n"
]
}
],
"source": [
"cm = pd.crosstab(index=test_data['y_yes'], columns=np.round(predictions_array), rownames=['Observed'], colnames=['Predicted'])\n",
"tn = cm.iloc[0,0]; fn = cm.iloc[1,0]; tp = cm.iloc[1,1]; fp = cm.iloc[0,1]; p = (tp+tn)/(tp+tn+fp+fn)*100\n",
"print(\"\\n{0:<20}{1:<4.1f}%\\n\".format(\"Overall Classification Rate: \", p))\n",
"print(\"{0:<15}{1:<15}{2:>8}\".format(\"Predicted\", \"No Purchase\", \"Purchase\"))\n",
"print(\"Observed\")\n",
"print(\"{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})\".format(\"No Purchase\", tn/(tn+fn)*100,tn, fp/(tp+fp)*100, fp))\n",
"print(\"{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \\n\".format(\"Purchase\", fn/(tn+fn)*100,fn, tp/(tp+fp)*100, tp))\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:sagemaker:Deleting endpoint with name: xgboost-2018-11-02-09-23-03-633\n"
]
},
{
"data": {
"text/plain": [
"[{'ResponseMetadata': {'RequestId': '1AF9B148A4DFF8A9',\n",
" 'HostId': 'sMBylANeXsyTeBJZsc0qMH2IbZUe8f+d9DqABrzBpwMNbLFsrhN4l0qW6qvJQqg1uHXapXnm64I=',\n",
" 'HTTPStatusCode': 200,\n",
" 'HTTPHeaders': {'x-amz-id-2': 'sMBylANeXsyTeBJZsc0qMH2IbZUe8f+d9DqABrzBpwMNbLFsrhN4l0qW6qvJQqg1uHXapXnm64I=',\n",
" 'x-amz-request-id': '1AF9B148A4DFF8A9',\n",
" 'date': 'Fri, 02 Nov 2018 12:01:07 GMT',\n",
" 'connection': 'close',\n",
" 'content-type': 'application/xml',\n",
" 'transfer-encoding': 'chunked',\n",
" 'server': 'AmazonS3'},\n",
" 'RetryAttempts': 0},\n",
" 'Deleted': [{'Key': 'sagemaker/DEMO-xgboost-dm/train/train.csv'},\n",
" {'Key': 'sagemaker/DEMO-xgboost-dm/output/xgboost-2018-11-02-09-23-03-633/output/model.tar.gz'}]}]"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)\n",
"bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)\n",
"bucket_to_delete.objects.all().delete()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "conda_python3",
"language": "python",
"name": "conda_python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment