Arnold1

## gist:44a5345ab0cc7021f9e0f59dc5ff172f
==================
==================
WARNING: DATA RACE
Read at 0x00c4200259b8 by goroutine 14:
  runtime.slicecopy()
      /usr/local/Cellar/go/1.10.2/libexec/src/runtime/slice.go:192 +0x0
  github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.copyArgs()
      /Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/args.go:320 +0x27a
  github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.(*RequestHeader).CopyTo()
      /Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/header.go:703 +0x7e0

## gist:769555c0c712a63ddb7bc2adfe307faa
https://stackoverflow.com/questions/33878433/spark-write-avro-file
http://www.bigdatatidbits.cc/2015/01/how-to-load-some-avro-data-into-spark.html
https://stackoverflow.com/questions/33899417/avro-schema-to-spark-structtype/
https://stackoverflow.com/questions/36078420/spark-avro-to-parquet
https://github.com/tomwhite/hadoop-book/blob/master/ch19-spark/src/test/scala/RDDCreationTest.scala
https://gist.github.com/MLnick/5864741781b9340cb211
http://alvincjin.blogspot.com/2015/11/append-spark-dataframe-with-new-column.html
https://stackoverflow.com/questions/27033823/how-to-overwrite-the-output-directory-in-spark
https://gist.github.com/yzhong52/f81e929e5810271292bd08856e2f4512
https://stackoverflow.com/questions/41567859/extract-a-column-value-from-a-spark-dataframe-and-add-it-to-another-dataframe

## main.sql
select mm_date, COUNT(DISTINCT mm_id)
from data1 join data2 on (data1.mm_id = data2.mm_id)
where mm_date between '2018-01-22' and '2018-01-22'
and data2.type <> 'test1' and data2.type <> 'test2'


FAILED: SemanticException Column mm_id Found in more than One Tables/Subqueries

## gist:9ec8de27d0918f8a58f3b9c456714594
select im_date, im_id, Count(*)
from data
where im_date between '2018-01-22' and '2018-01-22'
and im_id=12345
group by im_date, im_id

## KafkaSparkPopularHashTags.scala
import java.util.HashMap

import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord }
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.streaming.{ Seconds, StreamingContext }
import org.apache.spark.SparkContext._
import org.apache.spark.streaming.twitter._
import org.apache.spark.SparkConf

## SparkPopularHashTags.scala
import org.apache.spark.streaming.{ Seconds, StreamingContext }
import org.apache.spark.SparkContext._
import org.apache.spark.streaming.twitter._
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.{ SparkContext, SparkConf }
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.flume._

/**

## TweetStreams.scala
import Utils

import org.apache.spark.streaming.StreamingContext._
import org.apache.spark.streaming.{Seconds, StreamingContext}
import StreamingContext._
import org.apache.spark.SparkContext._
import org.apache.spark.streaming.twitter._
import org.apache.spark.SparkConf
import org.apache.spark.{SparkConf, SparkContext}

## 00-OozieWorkflowHdfsAndEmailActions
This gist includes components of a simple workflow application that created a directory and moves files within
hdfs to this directory;
Emails are sent out to notify designated users of success/failure of workflow.  There is a prepare section,
to allow re-run of the  action..the prepare essentially negates the move done by a potential prior run
of the action.  Sample data is also included.

The sample application includes:
--------------------------------
1.  Oozie actions: hdfs action and email action
2.  Oozie workflow controls: start, end, and kill.

## 00-OozieWorkflowHdfsAndEmailActions
This gist includes components of a simple workflow application that created a directory and moves files within
hdfs to this directory;
Emails are sent out to notify designated users of success/failure of workflow.  There is a prepare section,
to allow re-run of the  action..the prepare essentially negates the move done by a potential prior run
of the action.  Sample data is also included.

The sample application includes:
--------------------------------
1.  Oozie actions: hdfs action and email action
2.  Oozie workflow controls: start, end, and kill.

## cv2ff.cpp
/*
 * Convert from OpenCV image and write movie with FFmpeg
 *
 * Copyright (c) 2016 yohhoy
 */
#include <iostream>
#include <vector>
// FFmpeg
extern "C" {
#include <libavformat/avformat.h>
	==================
	==================
	WARNING: DATA RACE
	Read at 0x00c4200259b8 by goroutine 14:
	runtime.slicecopy()
	/usr/local/Cellar/go/1.10.2/libexec/src/runtime/slice.go:192 +0x0
	github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.copyArgs()
	/Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/args.go:320 +0x27a
	github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.(*RequestHeader).CopyTo()
	/Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/header.go:703 +0x7e0
	https://stackoverflow.com/questions/33878433/spark-write-avro-file
	http://www.bigdatatidbits.cc/2015/01/how-to-load-some-avro-data-into-spark.html
	https://stackoverflow.com/questions/33899417/avro-schema-to-spark-structtype/
	https://stackoverflow.com/questions/36078420/spark-avro-to-parquet
	https://github.com/tomwhite/hadoop-book/blob/master/ch19-spark/src/test/scala/RDDCreationTest.scala
	https://gist.github.com/MLnick/5864741781b9340cb211
	http://alvincjin.blogspot.com/2015/11/append-spark-dataframe-with-new-column.html
	https://stackoverflow.com/questions/27033823/how-to-overwrite-the-output-directory-in-spark
	https://gist.github.com/yzhong52/f81e929e5810271292bd08856e2f4512
	https://stackoverflow.com/questions/41567859/extract-a-column-value-from-a-spark-dataframe-and-add-it-to-another-dataframe
	select mm_date, COUNT(DISTINCT mm_id)
	from data1 join data2 on (data1.mm_id = data2.mm_id)
	where mm_date between '2018-01-22' and '2018-01-22'
	and data2.type <> 'test1' and data2.type <> 'test2'


	FAILED: SemanticException Column mm_id Found in more than One Tables/Subqueries
	select im_date, im_id, Count(*)
	from data
	where im_date between '2018-01-22' and '2018-01-22'
	and im_id=12345
	group by im_date, im_id
	import java.util.HashMap

	import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord }
	import org.apache.spark.SparkConf
	import org.apache.spark.streaming._
	import org.apache.spark.streaming.kafka._
	import org.apache.spark.streaming.{ Seconds, StreamingContext }
	import org.apache.spark.SparkContext._
	import org.apache.spark.streaming.twitter._
	import org.apache.spark.SparkConf
	import Utils

	import org.apache.spark.streaming.StreamingContext._
	import org.apache.spark.streaming.{Seconds, StreamingContext}
	import StreamingContext._
	import org.apache.spark.SparkContext._
	import org.apache.spark.streaming.twitter._
	import org.apache.spark.SparkConf
	import org.apache.spark.{SparkConf, SparkContext}
	This gist includes components of a simple workflow application that created a directory and moves files within
	hdfs to this directory;
	Emails are sent out to notify designated users of success/failure of workflow. There is a prepare section,
	to allow re-run of the action..the prepare essentially negates the move done by a potential prior run
	of the action. Sample data is also included.

	The sample application includes:
	--------------------------------
	1. Oozie actions: hdfs action and email action
	2. Oozie workflow controls: start, end, and kill.
	/*
	* Convert from OpenCV image and write movie with FFmpeg
	*
	* Copyright (c) 2016 yohhoy
	*/
	#include <iostream>
	#include <vector>
	// FFmpeg
	extern "C" {
	#include <libavformat/avformat.h>