ozydingo/sagemaker_demo.py

## sagemaker_demo.py
#!/usr/bin/env python
# coding: utf-8

# In[1]:


# import libraries
import boto3, re, sys, math, json, os, sagemaker, urllib.request
from sagemaker import get_execution_role
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import Image
from IPython.display import display
from time import gmtime, strftime
from sagemaker.predictor import csv_serializer

# Define IAM role
role = get_execution_role()
prefix = 'sagemaker/DEMO-xgboost-dm'
containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',
              'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',
              'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',
              'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest'} # each region has its XGBoost container
my_region = boto3.session.Session().region_name # set the region of the instance
print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + containers[my_region] + " container for your SageMaker endpoint.")


# In[2]:

# Create s3 resources
bucket_name = 'andrew-brain-power-sagemaker-demo' # <--- change this variable to a unique name for your bucket
s3 = boto3.resource('s3')
try:
    if  my_region == 'us-east-1':
      s3.create_bucket(Bucket=bucket_name)
    else:
      s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })
    print('S3 bucket created successfully')
except Exception as e:
    print('S3 error: ',e)


# In[3]:

# Download data
try:
  urllib.request.urlretrieve ("https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv", "bank_clean.csv")
  print('Success: downloaded bank_clean.csv.')
except Exception as e:
  print('Data load error: ',e)

try:
  model_data = pd.read_csv('./bank_clean.csv',index_col=0)
  print('Success: Data loaded into dataframe.')
except Exception as e:
    print('Data load error: ',e)


# In[12]:

# Split data into test, train sets
train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])
print(train_data.shape, test_data.shape)
print(train_data.describe())


# In[13]:

# Format and upload data to s3
pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)
boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')


# In[14]:

# Create sagemaker session
sess = sagemaker.Session()
xgb = sagemaker.estimator.Estimator(containers[my_region],role, train_instance_count=1, train_instance_type='ml.m4.xlarge',output_path='s3://{}/{}/output'.format(bucket_name, prefix),sagemaker_session=sess)
xgb.set_hyperparameters(max_depth=5,eta=0.2,gamma=4,min_child_weight=6,subsample=0.8,silent=0,objective='binary:logistic',num_round=100)


# In[15]:

# fit the model
xgb.fit({'train': s3_input_train})


# In[16]:

# deploy a prediction server
xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')


# In[17]:

# Prepare test data, predict
test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).as_matrix() #load the data into an array
xgb_predictor.content_type = 'text/csv' # set the data type for an inference
xgb_predictor.serializer = csv_serializer # set the serializer type
predictions = xgb_predictor.predict(test_data_array).decode('utf-8') # predict!
predictions_array = np.fromstring(predictions[1:], sep=',') # and turn the prediction into an array
print(predictions_array.shape)


# In[32]:

# Validate results
cm = pd.crosstab(index=test_data['y_yes'], columns=np.round(predictions_array), rownames=['Observed'], colnames=['Predicted'])
tn = cm.iloc[0,0]; fn = cm.iloc[1,0]; tp = cm.iloc[1,1]; fp = cm.iloc[0,1]; p = (tp+tn)/(tp+tn+fp+fn)*100
print("\n{0:<20}{1:<4.1f}%\n".format("Overall Classification Rate: ", p))
print("{0:<15}{1:<15}{2:>8}".format("Predicted", "No Purchase", "Purchase"))
print("Observed")
print("{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})".format("No Purchase", tn/(tn+fn)*100,tn, fp/(tp+fp)*100, fp))
print("{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \n".format("Purchase", fn/(tn+fn)*100,fn, tp/(tp+fp)*100, tp))


# In[ ]:


### Clean up resources:

sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)
bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)
bucket_to_delete.objects.all().delete()
	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	# import libraries
	import boto3, re, sys, math, json, os, sagemaker, urllib.request
	from sagemaker import get_execution_role
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from IPython.display import Image
	from IPython.display import display
	from time import gmtime, strftime
	from sagemaker.predictor import csv_serializer

	# Define IAM role
	role = get_execution_role()
	prefix = 'sagemaker/DEMO-xgboost-dm'
	containers = {'us-west-2': '433757028032.dkr.ecr.us-west-2.amazonaws.com/xgboost:latest',
	'us-east-1': '811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest',
	'us-east-2': '825641698319.dkr.ecr.us-east-2.amazonaws.com/xgboost:latest',
	'eu-west-1': '685385470294.dkr.ecr.eu-west-1.amazonaws.com/xgboost:latest'} # each region has its XGBoost container
	my_region = boto3.session.Session().region_name # set the region of the instance
	print("Success - the MySageMakerInstance is in the " + my_region + " region. You will use the " + containers[my_region] + " container for your SageMaker endpoint.")


	# In[2]:

	# Create s3 resources
	bucket_name = 'andrew-brain-power-sagemaker-demo' # <--- change this variable to a unique name for your bucket
	s3 = boto3.resource('s3')
	try:
	if my_region == 'us-east-1':
	s3.create_bucket(Bucket=bucket_name)
	else:
	s3.create_bucket(Bucket=bucket_name, CreateBucketConfiguration={ 'LocationConstraint': my_region })
	print('S3 bucket created successfully')
	except Exception as e:
	print('S3 error: ',e)


	# In[3]:

	# Download data
	try:
	urllib.request.urlretrieve ("https://d1.awsstatic.com/tmt/build-train-deploy-machine-learning-model-sagemaker/bank_clean.27f01fbbdf43271788427f3682996ae29ceca05d.csv", "bank_clean.csv")
	print('Success: downloaded bank_clean.csv.')
	except Exception as e:
	print('Data load error: ',e)

	try:
	model_data = pd.read_csv('./bank_clean.csv',index_col=0)
	print('Success: Data loaded into dataframe.')
	except Exception as e:
	print('Data load error: ',e)


	# In[12]:

	# Split data into test, train sets
	train_data, test_data = np.split(model_data.sample(frac=1, random_state=1729), [int(0.7 * len(model_data))])
	print(train_data.shape, test_data.shape)
	print(train_data.describe())


	# In[13]:

	# Format and upload data to s3
	pd.concat([train_data['y_yes'], train_data.drop(['y_no', 'y_yes'], axis=1)], axis=1).to_csv('train.csv', index=False, header=False)
	boto3.Session().resource('s3').Bucket(bucket_name).Object(os.path.join(prefix, 'train/train.csv')).upload_file('train.csv')
	s3_input_train = sagemaker.s3_input(s3_data='s3://{}/{}/train'.format(bucket_name, prefix), content_type='csv')


	# In[14]:

	# Create sagemaker session
	sess = sagemaker.Session()
	xgb = sagemaker.estimator.Estimator(containers[my_region],role, train_instance_count=1, train_instance_type='ml.m4.xlarge',output_path='s3://{}/{}/output'.format(bucket_name, prefix),sagemaker_session=sess)
	xgb.set_hyperparameters(max_depth=5,eta=0.2,gamma=4,min_child_weight=6,subsample=0.8,silent=0,objective='binary:logistic',num_round=100)


	# In[15]:

	# fit the model
	xgb.fit({'train': s3_input_train})


	# In[16]:

	# deploy a prediction server
	xgb_predictor = xgb.deploy(initial_instance_count=1,instance_type='ml.m4.xlarge')


	# In[17]:

	# Prepare test data, predict
	test_data_array = test_data.drop(['y_no', 'y_yes'], axis=1).as_matrix() #load the data into an array
	xgb_predictor.content_type = 'text/csv' # set the data type for an inference
	xgb_predictor.serializer = csv_serializer # set the serializer type
	predictions = xgb_predictor.predict(test_data_array).decode('utf-8') # predict!
	predictions_array = np.fromstring(predictions[1:], sep=',') # and turn the prediction into an array
	print(predictions_array.shape)


	# In[32]:

	# Validate results
	cm = pd.crosstab(index=test_data['y_yes'], columns=np.round(predictions_array), rownames=['Observed'], colnames=['Predicted'])
	tn = cm.iloc[0,0]; fn = cm.iloc[1,0]; tp = cm.iloc[1,1]; fp = cm.iloc[0,1]; p = (tp+tn)/(tp+tn+fp+fn)*100
	print("\n{0:<20}{1:<4.1f}%\n".format("Overall Classification Rate: ", p))
	print("{0:<15}{1:<15}{2:>8}".format("Predicted", "No Purchase", "Purchase"))
	print("Observed")
	print("{0:<15}{1:<2.0f}% ({2:<}){3:>6.0f}% ({4:<})".format("No Purchase", tn/(tn+fn)100,tn, fp/(tp+fp)100, fp))
	print("{0:<16}{1:<1.0f}% ({2:<}){3:>7.0f}% ({4:<}) \n".format("Purchase", fn/(tn+fn)100,fn, tp/(tp+fp)100, tp))


	# In[ ]:


	### Clean up resources:

	sagemaker.Session().delete_endpoint(xgb_predictor.endpoint)
	bucket_to_delete = boto3.resource('s3').Bucket(bucket_name)
	bucket_to_delete.objects.all().delete()