git clone git@github.com:YOUR-USERNAME/YOUR-FORKED-REPO.git
cd into/cloned/fork-repo
git remote add upstream git://github.com/ORIGINAL-DEV-USERNAME/REPO-YOU-FORKED-FROM.git
git fetch upstream
| glue = boto3.client('glue') | |
| glue_job_name = 'MyDataProcessingETL' | |
| s3_script_path = 's3://my-code-bucket/glue/glue-etl-processing.py' | |
| my_glue_role = 'MyGlueJobRole' # created earlier | |
| response = glue.create_job( | |
| Name=glue_job_name, |
| input_df = spark.read.option("header", "true").csv(s3_input_data_path) | |
| rearranged_col_names_df = input_df.select(*columns) | |
| # drop null values | |
| cleaned_df = rearranged_col_names_df.dropna() | |
| print("Dropped null values") | |
| # split dataframe into train and validation | |
| splits = cleaned_df.randomSplit([0.7, 0.3], 0) |
| # Copyright (C) 2016 Martina Pugliese | |
| from boto3 import resource | |
| from boto3.dynamodb.conditions import Key | |
| # The boto3 dynamoDB resource | |
| dynamodb_resource = resource('dynamodb') | |
| def get_table_metadata(table_name): |