Skip to content

Instantly share code, notes, and snippets.

@gxercavins
gxercavins / queries.sql
Created February 16, 2020 10:28
SO question 60246807
-- create t1
CREATE TABLE
overwrite.t1 (sales INT64,
sdate DATE)
PARTITION BY
sdate;
-- create t2
CREATE TABLE
overwrite.t2 (sales INT64,
@gxercavins
gxercavins / pd-concat.py
Created February 8, 2020 14:53
SO question 60080589
import argparse, logging
import pandas as pd
from random import choice
import apache_beam as beam
from apache_beam.options.pipeline_options import SetupOptions
import apache_beam.transforms.combiners as combine
import apache_beam.pvalue as pvalue
@gxercavins
gxercavins / BigQueryUpsert.java
Created February 8, 2020 12:07
SO question 60070098
package org.apache.beam.examples;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import com.google.api.services.bigquery.model.TableFieldSchema;
import com.google.api.services.bigquery.model.TableRow;
import com.google.api.services.bigquery.model.TableSchema;
import com.google.cloud.bigquery.BigQuery;
@gxercavins
gxercavins / AllSideOutputs.java
Created February 1, 2020 21:51
SO question 60011995
package org.apache.beam.examples;
import java.util.Arrays;
import java.util.List;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
@gxercavins
gxercavins / mp3.py
Created January 21, 2020 10:15
SO question 59827321
#!/usr/bin/env python
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
@gxercavins
gxercavins / mp3.py
Created January 21, 2020 10:15
SO question 59827321
#!/usr/bin/env python
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
@gxercavins
gxercavins / script.py
Created January 12, 2020 13:53
SO question 59697878
import argparse
import logging
import apache_beam as beam
from apache_beam.options.pipeline_options import SetupOptions
from apache_beam.options.pipeline_options import PipelineOptions
def prediction_handler(element):
@gxercavins
gxercavins / exists.sql
Created January 12, 2020 12:02
SO question 59702176
WITH
authors AS (
SELECT
author,
DATE_TRUNC(DATE(time_ts), MONTH) AS month
FROM
`bigquery-public-data.hacker_news.stories`
WHERE
author IS NOT NULL
GROUP BY 1,2)
@gxercavins
gxercavins / credentials-in-side-input.py
Created January 5, 2020 14:35
SO question 59557617
import argparse, json, logging
import datetime
import apache_beam as beam
import apache_beam.pvalue as pvalue
from apache_beam.io import ReadFromText
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
@gxercavins
gxercavins / schema-in-side-input.py
Created December 27, 2019 18:55
SO question 59458599
import argparse, json, logging
import apache_beam as beam
import apache_beam.pvalue as pvalue
from apache_beam.io import ReadFromText
from apache_beam.options.pipeline_options import PipelineOptions
from apache_beam.options.pipeline_options import SetupOptions
class EnrichElementsFn(beam.DoFn):