Skip to content

Instantly share code, notes, and snippets.

@bpatoul
Last active July 3, 2022 07:14
Show Gist options
  • Save bpatoul/254331c91c4ac764b7d3bec4d6ffec5a to your computer and use it in GitHub Desktop.
Save bpatoul/254331c91c4ac764b7d3bec4d6ffec5a to your computer and use it in GitHub Desktop.
response = client.create_processing_job(
ProcessingInputs=[
{
'InputName': 'data',
'DatasetDefinition': {
'AthenaDatasetDefinition': {
'Catalog': 'AwsDataCatalog',
'Database': 'link',
'QueryString': 'SELECT * FROM "link"."dl_tpc_customer"',
'WorkGroup': 'primary',
'OutputS3Uri': 's3://sagemaker-studio-w16ttswarb/',
'OutputFormat': 'PARQUET'
},
'LocalPath': '/opt/ml/processing/input/dataset.parquet'
}
},
{
'InputName': 'code',
'AppManaged': False,
'S3Input': {
'S3Uri': 's3://sagemaker-studio-w16ttswarb/processingCode/',
'LocalPath': '/opt/ml/processing/input/code',
'S3DataType': 'S3Prefix',
'S3InputMode': 'File',
'S3DataDistributionType': 'FullyReplicated'
},
},
],
ProcessingOutputConfig={
'Outputs': [
{
'OutputName': 'OutputProcessingJobAthenaTrain',
'S3Output': {
'S3Uri': 's3://sagemaker-studio-w16ttswarb/processingJobOutput/',
'LocalPath': '/opt/ml/processing/output/',
'S3UploadMode': 'EndOfJob'
},
}
],
},
ProcessingJobName='processingJobAthenaBoto19',
ProcessingResources={
'ClusterConfig': {
'InstanceCount': 1,
'InstanceType': 'ml.t3.large',
'VolumeSizeInGB': 15
}
},
AppSpecification={
'ImageUri': '683313688378.dkr.ecr.us-east-1.amazonaws.com/sagemaker-scikit-learn:0.23-1-cpu-py3',
'ContainerEntrypoint': ['python3', '/opt/ml/processing/input/code/preprocessing.py']
},
RoleArn='YOUR ROLE',
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment