Ahmed Elhossaini sandboxws

## get_addons_host.gql
{
  "operationName": "GetAddOns",
  "variables": {
    "hostId": "5dd43565611ffd0029bc80b2",
    "listingId": "5e6b8d8ed39f1e00505f0294",
    "currency": "CAD",
    "offset": 0,
    "limit": 15,
    "state": "active"
  },

## mongoio_sink.py
__all__ = ['WriteToMongo']

import json

from pymongo import MongoClient
from apache_beam.transforms import PTransform
from apache_beam.io import iobase

class _MongoSink(iobase.Sink):
    """A :class:`~apache_beam.io.iobase.Sink`."""

## errors_gcloud_txt.java
// Extract errors PCollectionTuple
PCollection<String> bqTableRowsErrors =
  bqTableRowsTuple.get(bqTableRowsFailedTag)
  .setCoder(NullableCoder.of(StringUtf8Coder.of()));

// Log errors to a text file under cloud storage.
bqTableRowsErrors
  .apply(
    "Write Errors",
    TextIO.write().to("gs://beam-tutorial/album_errors.txt")

## with_output_tags.java
PCollectionTuple bqTableRowsTuple  =
  rows.apply(
    "HashMap to TableRow",
     ParDo.of(new HashMapToTableRowFn(bqTableRowsSuccessTag, bqTableRowsFailedTag))
      .withOutputTags(bqTableRowsSuccessTag, TupleTagList.of(bqTableRowsFailedTag))
  );

## tuple_tags.java
final TupleTag<TableRow> bqTableRowsSuccessTag =
  new TupleTag<TableRow>() {};
final TupleTag<String> bqTableRowsFailedTag =
  new TupleTag<String>() {};

## AlbumPipeline.java
package com.sandboxws.chinook;

import com.google.api.services.bigquery.model.TableRow;
import com.sandboxws.beam.AppOptions;
import com.sandboxws.beam.coders.TableRowCoder;
import com.sandboxws.chinook.bigquery.schema.AlbumTableSchema;

import java.sql.ResultSet;
import java.util.HashMap;
import java.util.Map;

## albumn.sh
gradle albums --args="--pgDatabase=chinook_development --pgUsername=root --project=GOOGLE_CLOUD_PROJECT_ID --outputTable=dwh.albums --tempLocation=gs://beam_tutorial/temp --stagingLocation=gs://beam_tutorial/staging"

## PostgresAlbumRead.java
PCollection<HashMap<String, Object>> rows = pipeline.apply(
  "Read Albums from PG",
  JdbcIO.<HashMap<String, Object>>read()
    .withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(pgDataSource))
    .withCoder(TableRowCoder.of())
    // Map ResultSet row to a HashMap
    .withRowMapper(new RowMapper<HashMap<String, Object>>() {
      @Override
      public HashMap<String, Object> mapRow(ResultSet resultSet) throws Exception {
        return TableRowMapper.asMap(resultSet, tableName, pkName);

## AlbumPipeline.java
package com.sandboxws.chinook;

import com.google.api.services.bigquery.model.TableRow;
import com.sandboxws.beam.AppOptions;
import com.sandboxws.beam.coders.TableRowCoder;
import com.sandboxws.chinook.bigquery.schema.AlbumTableSchema;

import java.sql.ResultSet;
import java.util.HashMap;
import java.util.Map;

## WriteAlbumeToBigQuery.java
bqTableRows.apply("Write to BigQuery",
  BigQueryIO.writeTableRows()
    .to(options.getOutputTable()) // Passed as an argument from the command line
    .withSchema(AlbumTableSchema.schema()) // The schema for the BigQuery table
    .ignoreUnknownValues() // Ignore any values passed but not defined on the table schema
    .withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) // Append to the BigQuery table.
    .withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) // Create the BigQuery table if it doesn't exist
);
	{
	"operationName": "GetAddOns",
	"variables": {
	"hostId": "5dd43565611ffd0029bc80b2",
	"listingId": "5e6b8d8ed39f1e00505f0294",
	"currency": "CAD",
	"offset": 0,
	"limit": 15,
	"state": "active"
	},
	__all__ = ['WriteToMongo']

	import json

	from pymongo import MongoClient
	from apache_beam.transforms import PTransform
	from apache_beam.io import iobase

	class _MongoSink(iobase.Sink):
	"""A :class:`~apache_beam.io.iobase.Sink`."""
	// Extract errors PCollectionTuple
	PCollection<String> bqTableRowsErrors =
	bqTableRowsTuple.get(bqTableRowsFailedTag)
	.setCoder(NullableCoder.of(StringUtf8Coder.of()));

	// Log errors to a text file under cloud storage.
	bqTableRowsErrors
	.apply(
	"Write Errors",
	TextIO.write().to("gs://beam-tutorial/album_errors.txt")
	PCollectionTuple bqTableRowsTuple =
	rows.apply(
	"HashMap to TableRow",
	ParDo.of(new HashMapToTableRowFn(bqTableRowsSuccessTag, bqTableRowsFailedTag))
	.withOutputTags(bqTableRowsSuccessTag, TupleTagList.of(bqTableRowsFailedTag))
	);
	final TupleTag<TableRow> bqTableRowsSuccessTag =
	new TupleTag<TableRow>() {};
	final TupleTag<String> bqTableRowsFailedTag =
	new TupleTag<String>() {};
	package com.sandboxws.chinook;

	import com.google.api.services.bigquery.model.TableRow;
	import com.sandboxws.beam.AppOptions;
	import com.sandboxws.beam.coders.TableRowCoder;
	import com.sandboxws.chinook.bigquery.schema.AlbumTableSchema;

	import java.sql.ResultSet;
	import java.util.HashMap;
	import java.util.Map;
	PCollection<HashMap<String, Object>> rows = pipeline.apply(
	"Read Albums from PG",
	JdbcIO.<HashMap<String, Object>>read()
	.withDataSourceConfiguration(JdbcIO.DataSourceConfiguration.create(pgDataSource))
	.withCoder(TableRowCoder.of())
	// Map ResultSet row to a HashMap
	.withRowMapper(new RowMapper<HashMap<String, Object>>() {
	@Override
	public HashMap<String, Object> mapRow(ResultSet resultSet) throws Exception {
	return TableRowMapper.asMap(resultSet, tableName, pkName);
	bqTableRows.apply("Write to BigQuery",
	BigQueryIO.writeTableRows()
	.to(options.getOutputTable()) // Passed as an argument from the command line
	.withSchema(AlbumTableSchema.schema()) // The schema for the BigQuery table
	.ignoreUnknownValues() // Ignore any values passed but not defined on the table schema
	.withWriteDisposition(BigQueryIO.Write.WriteDisposition.WRITE_APPEND) // Append to the BigQuery table.
	.withCreateDisposition(BigQueryIO.Write.CreateDisposition.CREATE_IF_NEEDED) // Create the BigQuery table if it doesn't exist
	);