This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import apache_beam as beam | |
import apache_beam.transforms.window as window | |
from apache_beam.transforms import trigger | |
from apache_beam.options.pipeline_options import PipelineOptions | |
from apache_beam.options.pipeline_options import SetupOptions | |
from apache_beam.options.pipeline_options import StandardOptions | |
def load_my_conversion_data(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, json, logging | |
import apache_beam as beam | |
from apache_beam.io import fileio | |
from apache_beam.options.pipeline_options import PipelineOptions | |
from apache_beam.options.pipeline_options import SetupOptions | |
class JsonSink(fileio.TextSink): | |
def write(self, record): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.dataflow.samples; | |
import org.slf4j.Logger; | |
import org.slf4j.LoggerFactory; | |
import org.joda.time.Duration; | |
import org.joda.time.Instant; | |
import java.util.concurrent.TimeUnit; | |
import java.util.Date; | |
import java.sql.Timestamp; | |
import org.apache.beam.sdk.Pipeline; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, logging | |
import apache_beam as beam | |
import apache_beam.transforms.window as window | |
from apache_beam.options.pipeline_options import PipelineOptions | |
from apache_beam.options.pipeline_options import SetupOptions | |
def set_last_step(file_list): | |
dic = {'folder': '1950', 'last_file': 'the_last_one.txt'} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, json, logging, time | |
import apache_beam as beam | |
import apache_beam.transforms.window as window | |
from apache_beam.io import WriteToText | |
from apache_beam.options.pipeline_options import PipelineOptions | |
from apache_beam.options.pipeline_options import SetupOptions | |
class DebugPrinterFn(beam.DoFn): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, logging | |
import apache_beam as beam | |
from apache_beam.options.pipeline_options import PipelineOptions | |
from apache_beam.io import ReadFromText | |
class ParseHeadersFn(beam.DoFn): | |
"""ParDo to output only the headers""" | |
def process(self, element): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.dataflow.samples; | |
import static com.google.common.base.MoreObjects.firstNonNull; | |
import com.google.common.collect.Iterables; | |
import com.google.gson.Gson; | |
import org.apache.beam.sdk.coders.VarIntCoder; | |
import org.apache.beam.sdk.Pipeline; | |
import org.apache.beam.sdk.PipelineResult; | |
import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import apache_beam as beam | |
PROJECT = "PROJECT_ID" | |
BUCKET = "BUCKET_NAME" | |
schema = "index:INTEGER,event:STRING" | |
FIELD_NAMES = ["index","event"] | |
class CsvToDictFn(beam.DoFn): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.dataflow.samples; | |
import static com.google.common.base.MoreObjects.firstNonNull; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.HashMap; | |
import java.util.List; | |
import java.util.Map; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.dataflow.samples; | |
import java.util.Arrays; | |
import java.util.List; | |
import org.apache.beam.sdk.Pipeline; | |
import org.apache.beam.sdk.coders.AvroCoder; | |
import org.apache.beam.sdk.coders.DefaultCoder; | |
import org.apache.beam.sdk.coders.StringUtf8Coder; | |
import org.apache.beam.sdk.options.PipelineOptions; |