Skip to content

Instantly share code, notes, and snippets.

@mshakhomirov
Created October 20, 2019 18:01
Show Gist options
  • Save mshakhomirov/477cc42cc3525bc47713c8bce06642eb to your computer and use it in GitHub Desktop.
Save mshakhomirov/477cc42cc3525bc47713c8bce06642eb to your computer and use it in GitHub Desktop.
def _load_table_as_df_normalized(bucket_name, file_name, tableSchema, tableName):
"""
Source data file must be outer JSON
"""
blob = CS.get_bucket(bucket_name).blob(file_name)
body = json.loads(blob.download_as_string())
table_id = BQ.dataset(BQ_DATASET).table(tableName)
schema = create_schema_from_yaml(tableSchema)
job_config.schema = schema
df = pandas.io.json.json_normalize(data=body, record_path='addresses',
meta=[ 'id' ,'first_name', 'last_name', 'dob']
, record_prefix='addresses_'
,errors='ignore')
df = df[['id','first_name','last_name','dob','addresses_status','addresses_address','addresses_city','addresses_state','addresses_zip','addresses_numberOfYears']]
load_job = BQ.load_table_from_dataframe(
df,
table_id,
job_config=job_config,
)
load_job.result()
print("Job finished.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment