Skip to content

Instantly share code, notes, and snippets.

@gxercavins
gxercavins / run_on_gcp.sh
Created February 8, 2019 20:30
Test for SO question 53404579
#!/bin/bash
if [ "$#" -ne 2 ]; then
echo "Please specify Project ID and GCS Bucket Name (without gs:// prefix)"
echo "Usage: ./run_on_gcp.sh project-id bucket-name"
exit
fi
export PROJECT=$1
export BUCKET=$2
@gxercavins
gxercavins / NonStandardDelimiters.java
Created February 17, 2019 21:35
Test for SO question 54638963
package com.dataflow.samples;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO;
import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.Method;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.options.Default;
package com.dataflow.samples;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.beam.sdk.Pipeline;
import argparse
import logging
import apache_beam as beam
import apache_beam.transforms.window as window
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
from apache_beam.options.pipeline_options import StandardOptions
WINDOW_SECONDS = 10
package com.dataflow.samples;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.joda.time.Duration;
import org.joda.time.Instant;
import java.util.concurrent.TimeUnit;
import java.util.Date;
import java.sql.Timestamp;
import org.apache.beam.sdk.Pipeline;
@gxercavins
gxercavins / gbk_sessions.py
Created April 1, 2019 12:03
SO question 55219481: use session windows with Python SDK
import argparse, json, logging, time
import apache_beam as beam
import apache_beam.transforms.window as window
from apache_beam.io import WriteToText
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
class DebugPrinter(beam.DoFn):
apache-beam[gcp]==2.9.0
inflect
@gxercavins
gxercavins / DataflowSHA256.java
Last active April 21, 2019 20:16
SO question 55784467
package com.dataflow.samples;
import java.io.IOException;
import java.security.MessageDigest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.KV;
@gxercavins
gxercavins / BeamSQL.java
Created April 30, 2019 21:41
StackOverflow question 55851708
package org.apache.beam.examples;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.coders.StringUtf8Coder;
import org.apache.beam.sdk.extensions.sql.SqlTransform;
@gxercavins
gxercavins / read.py
Created May 24, 2019 18:36
Stackoverflow question 56295585
import argparse
import logging
import re
import apache_beam as beam
from apache_beam.io import WriteToText
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
# input file pattern will be a template parameter