Skip to content

Instantly share code, notes, and snippets.

@mshakhomirov
Created October 20, 2019 17:53
Show Gist options
  • Save mshakhomirov/a7dd17fe07b023651ef7bb9ea1659f44 to your computer and use it in GitHub Desktop.
Save mshakhomirov/a7dd17fe07b023651ef7bb9ea1659f44 to your computer and use it in GitHub Desktop.
def _load_table_as_src(bucket_name, file_name, tableSchema, tableName):
# ! source file must be outer array JSON
# ! this will work for CSV where a row is A JSON string --> SRC column (Snowflake like)
blob = CS.get_bucket(bucket_name).blob(file_name)
body = json.loads(blob.download_as_string())
table_id = BQ.dataset(BQ_DATASET).table(tableName)
schema = create_schema_from_yaml(tableSchema)
job_config.schema = schema
job_config.source_format = bigquery.SourceFormat.CSV,
# something that doesn't exist in your data file:
job_config.field_delimiter =";"
# Notice that ';' worked because the snippet data does not contain ';'
job_config.write_disposition = 'WRITE_APPEND',
data_str = u"\n".join(json.dumps(item) for item in body)
print('data_str :', data_str)
data_file = io.BytesIO(data_str.encode())
print('data_file :', data_file)
load_job = BQ.load_table_from_file(
data_file,
table_id,
job_config=job_config,
)
load_job.result()
print("Job finished.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment