This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime, re, time | |
from airflow import models | |
from airflow.contrib.operators.dataflow_operator import DataFlowPythonOperator, GoogleCloudBucketHelper | |
from airflow.contrib.hooks.gcp_dataflow_hook import DataFlowHook | |
from airflow.models import BaseOperator | |
from typing import Dict, List | |
JOB_NAME='dataflow-python3' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.dataflow.samples; | |
import java.util.ArrayList; | |
import java.util.List; | |
import com.google.api.gax.paging.Page; | |
import com.google.cloud.storage.Blob; | |
import com.google.cloud.storage.Bucket; | |
import com.google.cloud.storage.BucketInfo; | |
import com.google.cloud.storage.Storage; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.apache.beam.examples; | |
import org.apache.beam.sdk.coders.AvroCoder; | |
import org.apache.beam.sdk.coders.DefaultCoder; | |
import org.apache.beam.sdk.options.PipelineOptions; | |
import org.apache.beam.sdk.options.PipelineOptionsFactory; | |
import org.apache.beam.sdk.Pipeline; | |
import org.apache.beam.sdk.transforms.Create; | |
import org.apache.beam.sdk.transforms.Combine; | |
import org.apache.beam.sdk.transforms.Combine.CombineFn; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (C) 2018 Google Inc. | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); you may not | |
* use this file except in compliance with the License. You may obtain a copy of | |
* the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, json, logging | |
import apache_beam as beam | |
from apache_beam.io.gcp import bigquery | |
from apache_beam.options.pipeline_options import PipelineOptions | |
from apache_beam.options.pipeline_options import SetupOptions | |
PROJECT="PROJECT_ID" | |
BQ_DATASET="DATASET_NAME" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import apache_beam as beam | |
import logging | |
from google.cloud import language | |
from google.cloud.language import enums | |
from google.cloud.language import types | |
PROJECT = 'PROJECT_ID' | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, json, logging | |
import apache_beam as beam | |
from apache_beam.options.pipeline_options import PipelineOptions | |
from apache_beam.options.pipeline_options import SetupOptions | |
class ExtractFn(beam.DoFn): | |
def process(self, element): | |
file_name = 'gs://' + "/".join(element['id'].split("/")[:-1]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse, datetime, logging | |
import apache_beam as beam | |
from apache_beam.options.pipeline_options import PipelineOptions | |
from apache_beam.options.pipeline_options import SetupOptions | |
class GetTimestampFn(beam.DoFn): | |
"""Prints element timestamp""" | |
def process(self, element, timestamp=beam.DoFn.TimestampParam): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.google.cloud.bigquery.BigQuery; | |
import com.google.cloud.bigquery.BigQueryOptions; | |
import com.google.cloud.bigquery.FieldValueList; | |
import com.google.cloud.bigquery.Job; | |
import com.google.cloud.bigquery.JobId; | |
import com.google.cloud.bigquery.JobInfo; | |
import com.google.cloud.bigquery.JobStatistics; | |
import com.google.cloud.bigquery.JobStatistics.QueryStatistics; | |
import com.google.cloud.bigquery.QueryJobConfiguration; | |
import com.google.cloud.bigquery.QueryResponse; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.dataflow.samples; | |
import org.apache.beam.sdk.io.gcp.pubsub.PubsubIO; | |
import org.apache.beam.sdk.options.Description; | |
import org.apache.beam.sdk.options.PipelineOptions; | |
import org.apache.beam.sdk.options.PipelineOptionsFactory; | |
import org.apache.beam.sdk.Pipeline; | |
import org.apache.beam.sdk.transforms.DoFn; | |
import org.apache.beam.sdk.transforms.ParDo; | |
import org.apache.beam.sdk.values.PCollection; |