Arnold1

## stream.py
import io
import socket
import struct
import time
import picamera
import argparse
import logging

########################################################################################################################
logging.basicConfig(level=logging.INFO, datefmt='%m/%d/%Y %I:%M:%S %p', format='%(asctime)s - %(name)s - %(message)s')

## Dockerfile-fontenvoy
FROM envoyproxy/envoy:latest

RUN apt-get update && apt-get -q install -y \
    curl
CMD /usr/local/bin/envoy -c /etc/front-envoy.yaml --service-cluster front-proxy

## go
package main

import (
 "bytes"
 "fmt"

 "github.com/aws/aws-sdk-go/aws"
 "github.com/aws/aws-sdk-go/aws/session"
 "github.com/aws/aws-sdk-go/service/s3"
)

## gist:44a5345ab0cc7021f9e0f59dc5ff172f
==================
==================
WARNING: DATA RACE
Read at 0x00c4200259b8 by goroutine 14:
  runtime.slicecopy()
      /usr/local/Cellar/go/1.10.2/libexec/src/runtime/slice.go:192 +0x0
  github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.copyArgs()
      /Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/args.go:320 +0x27a
  github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.(*RequestHeader).CopyTo()
      /Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/header.go:703 +0x7e0

## gist:769555c0c712a63ddb7bc2adfe307faa
https://stackoverflow.com/questions/33878433/spark-write-avro-file
http://www.bigdatatidbits.cc/2015/01/how-to-load-some-avro-data-into-spark.html
https://stackoverflow.com/questions/33899417/avro-schema-to-spark-structtype/
https://stackoverflow.com/questions/36078420/spark-avro-to-parquet
https://github.com/tomwhite/hadoop-book/blob/master/ch19-spark/src/test/scala/RDDCreationTest.scala
https://gist.github.com/MLnick/5864741781b9340cb211
http://alvincjin.blogspot.com/2015/11/append-spark-dataframe-with-new-column.html
https://stackoverflow.com/questions/27033823/how-to-overwrite-the-output-directory-in-spark
https://gist.github.com/yzhong52/f81e929e5810271292bd08856e2f4512
https://stackoverflow.com/questions/41567859/extract-a-column-value-from-a-spark-dataframe-and-add-it-to-another-dataframe

## main.scala
// reference: https://stackoverflow.com/questions/36795680/copy-schema-from-one-dataframe-to-another-dataframe?rq=1

case class Person(Dummy: String, Name: String, Timestamp: String, Age: Int)

val personDF = spark.sparkContext.parallelize(Seq(Person("dummy", "Ray", "12345", 23), Person("dummy", "John", "12345", 44))).toDF()

val personSchema = StructType(
    Seq(StructField("Name", StringType, true),
        StructField("Age", IntegerType, true)))
var dataRDD = spark.sparkContext.emptyRDD[Row]

## main.sql
select mm_date, COUNT(DISTINCT mm_id)
from data1 join data2 on (data1.mm_id = data2.mm_id)
where mm_date between '2018-01-22' and '2018-01-22'
and data2.type <> 'test1' and data2.type <> 'test2'


FAILED: SemanticException Column mm_id Found in more than One Tables/Subqueries

## gist:9ec8de27d0918f8a58f3b9c456714594
select im_date, im_id, Count(*)
from data
where im_date between '2018-01-22' and '2018-01-22'
and im_id=12345
group by im_date, im_id

## KafkaSparkPopularHashTags.scala
import java.util.HashMap

import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord }
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.streaming.{ Seconds, StreamingContext }
import org.apache.spark.SparkContext._
import org.apache.spark.streaming.twitter._
import org.apache.spark.SparkConf

## SparkPopularHashTags.scala
import org.apache.spark.streaming.{ Seconds, StreamingContext }
import org.apache.spark.SparkContext._
import org.apache.spark.streaming.twitter._
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.{ SparkContext, SparkConf }
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.flume._

/**
	import io
	import socket
	import struct
	import time
	import picamera
	import argparse
	import logging

	########################################################################################################################
	logging.basicConfig(level=logging.INFO, datefmt='%m/%d/%Y %I:%M:%S %p', format='%(asctime)s - %(name)s - %(message)s')
	FROM envoyproxy/envoy:latest

	RUN apt-get update && apt-get -q install -y \
	curl
	CMD /usr/local/bin/envoy -c /etc/front-envoy.yaml --service-cluster front-proxy
	package main

	import (
	"bytes"
	"fmt"

	"github.com/aws/aws-sdk-go/aws"
	"github.com/aws/aws-sdk-go/aws/session"
	"github.com/aws/aws-sdk-go/service/s3"
	)
	==================
	==================
	WARNING: DATA RACE
	Read at 0x00c4200259b8 by goroutine 14:
	runtime.slicecopy()
	/usr/local/Cellar/go/1.10.2/libexec/src/runtime/slice.go:192 +0x0
	github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.copyArgs()
	/Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/args.go:320 +0x27a
	github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.(*RequestHeader).CopyTo()
	/Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/header.go:703 +0x7e0
	https://stackoverflow.com/questions/33878433/spark-write-avro-file
	http://www.bigdatatidbits.cc/2015/01/how-to-load-some-avro-data-into-spark.html
	https://stackoverflow.com/questions/33899417/avro-schema-to-spark-structtype/
	https://stackoverflow.com/questions/36078420/spark-avro-to-parquet
	https://github.com/tomwhite/hadoop-book/blob/master/ch19-spark/src/test/scala/RDDCreationTest.scala
	https://gist.github.com/MLnick/5864741781b9340cb211
	http://alvincjin.blogspot.com/2015/11/append-spark-dataframe-with-new-column.html
	https://stackoverflow.com/questions/27033823/how-to-overwrite-the-output-directory-in-spark
	https://gist.github.com/yzhong52/f81e929e5810271292bd08856e2f4512
	https://stackoverflow.com/questions/41567859/extract-a-column-value-from-a-spark-dataframe-and-add-it-to-another-dataframe
	// reference: https://stackoverflow.com/questions/36795680/copy-schema-from-one-dataframe-to-another-dataframe?rq=1

	case class Person(Dummy: String, Name: String, Timestamp: String, Age: Int)

	val personDF = spark.sparkContext.parallelize(Seq(Person("dummy", "Ray", "12345", 23), Person("dummy", "John", "12345", 44))).toDF()

	val personSchema = StructType(
	Seq(StructField("Name", StringType, true),
	StructField("Age", IntegerType, true)))
	var dataRDD = spark.sparkContext.emptyRDD[Row]
	select mm_date, COUNT(DISTINCT mm_id)
	from data1 join data2 on (data1.mm_id = data2.mm_id)
	where mm_date between '2018-01-22' and '2018-01-22'
	and data2.type <> 'test1' and data2.type <> 'test2'


	FAILED: SemanticException Column mm_id Found in more than One Tables/Subqueries
	select im_date, im_id, Count(*)
	from data
	where im_date between '2018-01-22' and '2018-01-22'
	and im_id=12345
	group by im_date, im_id
	import java.util.HashMap

	import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord }
	import org.apache.spark.SparkConf
	import org.apache.spark.streaming._
	import org.apache.spark.streaming.kafka._
	import org.apache.spark.streaming.{ Seconds, StreamingContext }
	import org.apache.spark.SparkContext._
	import org.apache.spark.streaming.twitter._
	import org.apache.spark.SparkConf