This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# i. Download sample data and extract features and label (fraud/nonfraud). | |
... | |
# ii. Convert the n-dimensional arrays into RecordIO format (a highly efficient data format). | |
import sagemaker.amazon.common as smac | |
buf = io.BytesIO() | |
smac.write_numpy_to_dense_tensor(buf, features, labels) | |
... | |
# iii. Store the RecordIO data into S3 bucket. | |
bucket = "fraud-detection-end-to-end-demo" | |
prefix = 'linear-learner' | |
key = 'recordio-pb-data' | |
boto3.resource('s3').Bucket(bucket).Object(os.path.join(prefix, 'train', key)).upload_fileobj(buf) | |
... | |
# iv. Retrieve the Docker image for the linear learner algorithm. | |
container = get_image_uri(boto3.Session().region_name, 'linear-learner') | |
... | |
# v. Create a training job with the desired instance type and instance count, change the (hyper)parameters of the algorithm and start training using the training data uploaded to S3 earlier. You can see how simple it is to set up a cluster of servers to train a model and only pay for the time that it takes to train, a major cost saver. | |
import sagemaker | |
s3_train_data = 's3://{}/{}/train/{}'.format(bucket, prefix, key) | |
output_location = 's3://{}/{}/output'.format(bucket, prefix) | |
linear = sagemaker.estimator.Estimator(container, | |
get_execution_role(), | |
train_instance_count=1, | |
train_instance_type='ml.c4.xlarge', | |
output_path=output_location, | |
sagemaker_session=session) | |
linear.set_hyperparameters(feature_dim=features.shape[1], | |
predictor_type='binary_classifier', | |
mini_batch_size=200) | |
linear.fit({'train': s3_train_data}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment