Skip to content

Instantly share code, notes, and snippets.

@brunoripa
Created January 29, 2018 16:34
Show Gist options
  • Save brunoripa/07038d9dce842014b941661f84651410 to your computer and use it in GitHub Desktop.
Save brunoripa/07038d9dce842014b941661f84651410 to your computer and use it in GitHub Desktop.
with apache_beam.Pipeline(options=options) as p:
rows = (
p |
ReadFromText("input.txt") |
apache_beam.ParDo(Split())
)
timings = (
rows |
apache_beam.ParDo(CollectTimings()) |
"Grouping timings" >> apache_beam.GroupByKey() |
"Calculating average" >> apache_beam.CombineValues(
apache_beam.combiners.MeanCombineFn()
)
)
users = (
rows |
apache_beam.ParDo(CollectUsers()) |
"Grouping users" >> apache_beam.GroupByKey() |
"Counting users" >> apache_beam.CombineValues(
apache_beam.combiners.CountCombineFn()
)
)
to_be_joined = (
{
'timings': timings,
'users': users
} |
apache_beam.CoGroupByKey() |
apache_beam.ParDo(WriteToCSV()) |
WriteToText("output.txt")
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment