Skip to content

Instantly share code, notes, and snippets.

@gxercavins
Created December 6, 2019 18:12
Show Gist options
  • Save gxercavins/aec345291ec31cef0d5b84000ebe1ab5 to your computer and use it in GitHub Desktop.
Save gxercavins/aec345291ec31cef0d5b84000ebe1ab5 to your computer and use it in GitHub Desktop.
SO question 59217700
import argparse, logging
import apache_beam as beam
import apache_beam.transforms.window as window
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
def set_last_step(file_list):
dic = {'folder': '1950', 'last_file': 'the_last_one.txt'}
return dic
def run(argv=None):
parser = argparse.ArgumentParser()
known_args, pipeline_args = parser.parse_known_args(argv)
pipeline_options = PipelineOptions(pipeline_args)
pipeline_options.view_as(SetupOptions).save_main_session = True
p = beam.Pipeline(options=pipeline_options)
(p
| 'Read Configuration Table ' >> beam.Create(['Start'])
| 'Set record update' >> beam.Map(set_last_step)
| 'Update table' >> beam.io.gcp.bigquery.WriteToBigQuery(
table='PROJECT:DATASET.write2bq',
write_disposition='WRITE_TRUNCATE',
schema=('folder:STRING, last_file:STRING')
)
)
p.run()
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment