Jason Tang jastang

## iam-auth-role.json
{
   "Version": "2012-10-17",
   "Statement": [
      {
         "Effect": "Allow",
         "Action": [
             "rds-db:connect"
         ],
         "Resource": [
             "arn:aws:rds-db:your-region:XXXXXXXXXX:dbuser:*/benchsci"

## trust-policy.json
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "AWS": [
          "arn:aws:iam::XXXXXXXXX:role/your-lambda-function-role"
        ],
        "Service": "lambda.amazonaws.com"

## serverless.yml
provider:
  name: aws
  runtime: python3.7
  stage: 'production'
  region: us-east-1
  iamRoleStatements:
    - Effect: "Allow"
      Action:
        - "rds:*"
        - "sts:*"

## create_iam_user.sql
CREATE USER benchsci WITH LOGIN;
GRANT rds_iam TO benchsci;
-- optional
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA <schema> TO benchsci;

## udpate_metastore.py
"""
First, start the crawler if it is not already running.
"""
crawler = glue.get_crawler(Name='%s-%s' % (client, DATA_FREQUENCY))

if crawler['Crawler']['State'] == 'READY':
    response = glue.start_crawler(Name='%s-%s' % (client, DATA_FREQUENCY))
else:
    print('Crawler is not in a READY state or already running! Checking the catalog instead.')

## metadata_validator.py
def lambda_handler(event, context):
    records = event['Records']

    for record in records:
        b = record['s3']['bucket']['name']
        s3_file = record['s3']['object']['key']

        # Load the metadata validation configuration for this client
        conf = json.loads(s3.meta.client.get_object(Bucket=b, Key=VALIDATION_CONFIG)['Body'].read(),
                          object_pairs_hook=OrderedDict)

## get_s3_chunk.py
def lambda_handler(event, context):
    if event is None:
        print("Couldn't find the object or byte range!")
        return

    # The byte range is passed from the Chunking function
    start_byte = event['start_byte']
    end_byte = event['end_byte']
    bucket = event['bucket']
    key = event['key']

## chunk_s3.py
while end_byte <= objectsize:

            # Ensure the end_byte is a carriage return, so the line splits work.
            end_byte = scan_to_eol(end_byte, objectsize, bucket, key)

            # Invoke the cleaning function
            ctx = {
                "start_byte": start_byte,
                "end_byte": end_byte,
                "bucket": bucket,

## replicate_s3.py
def lambda_handler(event, context):
    sb = event['Src_Bucket']
    sk = event['Src_Key']
    db = event['Dest_Bucket']
    dk = event['Dest_Key']

    src = {
        'Bucket': sb,
        'Key': sk
    }

## read_json_s3.py
s3 = boto3.resource('s3')

conf = json.loads(s3.meta.client.get_object(Bucket=b, Key='my-config.json')['Body'].read(), object_pairs_hook=OrderedDict)

bucket = s3.Bucket(name=b)
# This is just metadata
src = bucket.objects.filter(Prefix='sftp')
# If we have N mandatory files, are most recent N uploads representative of the mandatory files?
mandatory_objects = [o.key for o in sorted(src, key=lambda x: x.last_modified, reverse=True)][:len(conf['mandatory_files'])]
# You can now validate whatever you want in mandatory_objects
	{
	"Version": "2012-10-17",
	"Statement": [
	{
	"Effect": "Allow",
	"Action": [
	"rds-db:connect"
	],
	"Resource": [
	"arn:aws:rds-db:your-region:XXXXXXXXXX:dbuser:*/benchsci"
	provider:
	name: aws
	runtime: python3.7
	stage: 'production'
	region: us-east-1
	iamRoleStatements:
	- Effect: "Allow"
	Action:
	- "rds:*"
	- "sts:*"
	CREATE USER benchsci WITH LOGIN;
	GRANT rds_iam TO benchsci;
	-- optional
	GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA <schema> TO benchsci;
	"""
	First, start the crawler if it is not already running.
	"""
	crawler = glue.get_crawler(Name='%s-%s' % (client, DATA_FREQUENCY))

	if crawler['Crawler']['State'] == 'READY':
	response = glue.start_crawler(Name='%s-%s' % (client, DATA_FREQUENCY))
	else:
	print('Crawler is not in a READY state or already running! Checking the catalog instead.')
	def lambda_handler(event, context):
	records = event['Records']

	for record in records:
	b = record['s3']['bucket']['name']
	s3_file = record['s3']['object']['key']

	# Load the metadata validation configuration for this client
	conf = json.loads(s3.meta.client.get_object(Bucket=b, Key=VALIDATION_CONFIG)['Body'].read(),
	object_pairs_hook=OrderedDict)
	def lambda_handler(event, context):
	if event is None:
	print("Couldn't find the object or byte range!")
	return

	# The byte range is passed from the Chunking function
	start_byte = event['start_byte']
	end_byte = event['end_byte']
	bucket = event['bucket']
	key = event['key']
	while end_byte <= objectsize:

	# Ensure the end_byte is a carriage return, so the line splits work.
	end_byte = scan_to_eol(end_byte, objectsize, bucket, key)

	# Invoke the cleaning function
	ctx = {
	"start_byte": start_byte,
	"end_byte": end_byte,
	"bucket": bucket,
	def lambda_handler(event, context):
	sb = event['Src_Bucket']
	sk = event['Src_Key']
	db = event['Dest_Bucket']
	dk = event['Dest_Key']

	src = {
	'Bucket': sb,
	'Key': sk
	}
	s3 = boto3.resource('s3')

	conf = json.loads(s3.meta.client.get_object(Bucket=b, Key='my-config.json')['Body'].read(), object_pairs_hook=OrderedDict)

	bucket = s3.Bucket(name=b)
	# This is just metadata
	src = bucket.objects.filter(Prefix='sftp')
	# If we have N mandatory files, are most recent N uploads representative of the mandatory files?
	mandatory_objects = [o.key for o in sorted(src, key=lambda x: x.last_modified, reverse=True)][:len(conf['mandatory_files'])]
	# You can now validate whatever you want in mandatory_objects