Last active
March 19, 2021 16:17
-
-
Save garystafford/d2066c70eb4288c22ca6397af41d8f0d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Purpose: Create Daily Macie classification job - Synthea patient data | |
# Author: Gary A. Stafford (March 2021) | |
import logging | |
import sys | |
import boto3 | |
from botocore.exceptions import ClientError | |
logging.basicConfig(format='[%(asctime)s] %(levelname)s - %(message)s', level=logging.INFO) | |
ssm_client = boto3.client('ssm') | |
sts_client = boto3.client('sts') | |
macie_client = boto3.client('macie2') | |
def main(): | |
params = get_parameters() | |
account_id = sts_client.get_caller_identity()['Account'] | |
custom_data_identifiers = list_custom_data_identifiers() | |
create_classification_job(params['patient_data_bucket'], account_id, custom_data_identifiers) | |
def list_custom_data_identifiers(): | |
"""Returns a list of all custom data identifier ids""" | |
custom_data_identifiers = [] | |
try: | |
response = macie_client.list_custom_data_identifiers() | |
for item in response['items']: | |
custom_data_identifiers.append(item['id']) | |
return custom_data_identifiers | |
except ClientError as e: | |
logging.error(e) | |
sys.exit(e) | |
def create_classification_job(patient_data_bucket, account_id, custom_data_identifiers): | |
"""Create Daily Macie classification job""" | |
try: | |
response = macie_client.create_classification_job( | |
customDataIdentifierIds=custom_data_identifiers, | |
description='Review Synthea patient data (Daily)', | |
jobType='SCHEDULED', | |
initialRun=True, | |
name='SyntheaPatientData_Daily', | |
s3JobDefinition={ | |
'bucketDefinitions': [ | |
{ | |
'accountId': account_id, | |
'buckets': [ | |
patient_data_bucket | |
] | |
} | |
], | |
'scoping': { | |
'includes': { | |
'and': [ | |
{ | |
'simpleScopeTerm': { | |
'comparator': 'EQ', | |
'key': 'OBJECT_EXTENSION', | |
'values': [ | |
'csv', | |
] | |
} | |
}, | |
] | |
} | |
} | |
}, | |
samplingPercentage=100, | |
scheduleFrequency={ | |
'dailySchedule': {} | |
}, | |
tags={ | |
'Project': 'Amazon Macie Demo' | |
} | |
) | |
logging.debug(f'Response: {response}') | |
except ClientError as e: | |
logging.error(e) | |
sys.exit(e) | |
def get_parameters(): | |
"""Load parameter values from AWS Systems Manager (SSM) Parameter Store""" | |
params = { | |
'patient_data_bucket': ssm_client.get_parameter(Name='/macie_demo/patient_data_bucket')['Parameter']['Value'] | |
} | |
return params | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment