Skip to content

Instantly share code, notes, and snippets.

@gxercavins
gxercavins / BeamSQL.java
Created April 30, 2019 21:41
StackOverflow question 55851708
package org.apache.beam.examples;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.extensions.sql.SqlTransform;
@gxercavins
gxercavins / one_window_one_file.py
Last active December 1, 2020 23:22
Stackoverflow question 56234318
import argparse, json, logging, time
import apache_beam as beam
import apache_beam.transforms.window as window
from apache_beam.io import filesystems
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
class AddWindowingInfoFn(beam.DoFn):
@gxercavins
gxercavins / read.py
Created May 24, 2019 18:36
Stackoverflow question 56295585
import argparse
import logging
import re
import apache_beam as beam
from apache_beam.io import WriteToText
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
# input file pattern will be a template parameter
@gxercavins
gxercavins / ChronologicalOrder.java
Created June 5, 2019 20:24
StackOverflow question 56454374
package com.dataflow.samples;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.ZonedDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
@gxercavins
gxercavins / map_vs_pardo.py
Created June 8, 2019 08:13
SO question 56502093
import argparse, logging
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
def compute_interest_map(data_item):
return data_item + 1
@gxercavins
gxercavins / StopSession.java
Created June 9, 2019 09:14
StackOverflow question 56465103
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
@gxercavins
gxercavins / DynamicTableFromKey.java
Created June 13, 2019 16:54
SO question 56545560
package com.dataflow.samples;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
import org.apache.beam.sdk.io.gcp.bigquery.DynamicDestinations;
import org.apache.beam.sdk.io.gcp.bigquery.TableDestination;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.Create;
@gxercavins
gxercavins / generate_messages.py
Last active June 16, 2019 14:48
SO question 56616576
import time
from random import choice, random
from google.cloud import pubsub_v1
# replace these variables
project_id = "PROJECT_ID"
topic_name = "TOPIC_NAME"
publisher = pubsub_v1.PublisherClient()
@gxercavins
gxercavins / SampleTextIO.java
Created June 23, 2019 18:09
SO question 56720785
package com.dataflow.samples;
import java.util.Arrays;
import java.util.List;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
@gxercavins
gxercavins / results.txt
Created July 6, 2019 11:09
SO question 56913056
yield element
INFO:root:Event: {'score': 10, 'team': 'red'}
INFO:root:Event: {'score': 10, 'team': 'red'}
INFO:root:Event: {'score': 8, 'team': 'blue'}
INFO:root:Event: {'score': 8, 'team': 'blue'}
return element
INFO:root:Event: {'score': 10, 'team': 'red'}
INFO:root:Event: score
INFO:root:Event: team