Skip to content

Instantly share code, notes, and snippets.

@patrickbrus
Created January 10, 2021 18:22
Show Gist options
  • Save patrickbrus/6f87749cad1ff15e809fea28435a0edc to your computer and use it in GitHub Desktop.
Save patrickbrus/6f87749cad1ff15e809fea28435a0edc to your computer and use it in GitHub Desktop.
Code for uploading data to S3 bucket
import boto3
import sagemaker
import os
# session and role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()
# region
region = boto3.Session().region_name
# create an S3 bucket
bucket = sagemaker_session.default_bucket()
# folder containing the training and test data
data_dir = "data_converted"
# set prefix, a descriptive name for a directory
prefix = "retail_data_analytics"
# upload all data to S3
train_input = sagemaker_session.upload_data(path=os.path.join(data_dir, "train.csv"), bucket=bucket, key_prefix=prefix)
val_input = sagemaker_session.upload_data(path=os.path.join(data_dir, "val.csv"), bucket=bucket, key_prefix=prefix)
test_input = sagemaker_session.upload_data(path=os.path.join(data_dir, "test.csv"), bucket=bucket, key_prefix=prefix)
# confirm that data is in S3 bucket
empty_check = []
for obj in boto3.resource('s3').Bucket(bucket).objects.all():
empty_check.append(obj.key)
print(obj.key)
assert len(empty_check) !=0, 'S3 bucket is empty.'
print('Test passed!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment