Skip to content

Instantly share code, notes, and snippets.

View Dockerfile-fontenvoy
FROM envoyproxy/envoy:latest
RUN apt-get update && apt-get -q install -y \
curl
CMD /usr/local/bin/envoy -c /etc/front-envoy.yaml --service-cluster front-proxy
@Arnold1
Arnold1 / go
Created Apr 10, 2019 — forked from CarterTsai/go
golang s3 PutObject
View go
package main
import (
"bytes"
"fmt"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
)
View gist:44a5345ab0cc7021f9e0f59dc5ff172f
==================
==================
WARNING: DATA RACE
Read at 0x00c4200259b8 by goroutine 14:
runtime.slicecopy()
/usr/local/Cellar/go/1.10.2/libexec/src/runtime/slice.go:192 +0x0
github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.copyArgs()
/Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/args.go:320 +0x27a
github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp.(*RequestHeader).CopyTo()
/Users/geri/intelligence/src/github.com/ABC/byoa-price-engine/vendor/github.com/valyala/fasthttp/header.go:703 +0x7e0
View gist:769555c0c712a63ddb7bc2adfe307faa
https://stackoverflow.com/questions/33878433/spark-write-avro-file
http://www.bigdatatidbits.cc/2015/01/how-to-load-some-avro-data-into-spark.html
https://stackoverflow.com/questions/33899417/avro-schema-to-spark-structtype/
https://stackoverflow.com/questions/36078420/spark-avro-to-parquet
https://github.com/tomwhite/hadoop-book/blob/master/ch19-spark/src/test/scala/RDDCreationTest.scala
https://gist.github.com/MLnick/5864741781b9340cb211
http://alvincjin.blogspot.com/2015/11/append-spark-dataframe-with-new-column.html
https://stackoverflow.com/questions/27033823/how-to-overwrite-the-output-directory-in-spark
https://gist.github.com/yzhong52/f81e929e5810271292bd08856e2f4512
https://stackoverflow.com/questions/41567859/extract-a-column-value-from-a-spark-dataframe-and-add-it-to-another-dataframe
@Arnold1
Arnold1 / main.scala
Created Feb 19, 2018
copy schema from one dataframe to another dataframe
View main.scala
// reference: https://stackoverflow.com/questions/36795680/copy-schema-from-one-dataframe-to-another-dataframe?rq=1
case class Person(Dummy: String, Name: String, Timestamp: String, Age: Int)
val personDF = spark.sparkContext.parallelize(Seq(Person("dummy", "Ray", "12345", 23), Person("dummy", "John", "12345", 44))).toDF()
val personSchema = StructType(
Seq(StructField("Name", StringType, true),
StructField("Age", IntegerType, true)))
var dataRDD = spark.sparkContext.emptyRDD[Row]
View main.sql
select mm_date, COUNT(DISTINCT mm_id)
from data1 join data2 on (data1.mm_id = data2.mm_id)
where mm_date between '2018-01-22' and '2018-01-22'
and data2.type <> 'test1' and data2.type <> 'test2'
FAILED: SemanticException Column mm_id Found in more than One Tables/Subqueries
View gist:9ec8de27d0918f8a58f3b9c456714594
select im_date, im_id, Count(*)
from data
where im_date between '2018-01-22' and '2018-01-22'
and im_id=12345
group by im_date, im_id
@Arnold1
Arnold1 / KafkaSparkPopularHashTags.scala
Created Nov 11, 2017 — forked from stdatalabs/KafkaSparkPopularHashTags.scala
A Spark Streaming - Kafka integration to receive twitter data from kafka topic and find the popular hashtags. More @ stdatalabs.blogspot.com
View KafkaSparkPopularHashTags.scala
import java.util.HashMap
import org.apache.kafka.clients.producer.{ KafkaProducer, ProducerConfig, ProducerRecord }
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.streaming.kafka._
import org.apache.spark.streaming.{ Seconds, StreamingContext }
import org.apache.spark.SparkContext._
import org.apache.spark.streaming.twitter._
import org.apache.spark.SparkConf
@Arnold1
Arnold1 / SparkPopularHashTags.scala
Created Nov 11, 2017 — forked from stdatalabs/SparkPopularHashTags.scala
TwitterPopularHashTags using Spark Streaming. More @ stdatalabs.blogspot.com
View SparkPopularHashTags.scala
import org.apache.spark.streaming.{ Seconds, StreamingContext }
import org.apache.spark.SparkContext._
import org.apache.spark.streaming.twitter._
import org.apache.spark.SparkConf
import org.apache.spark.streaming._
import org.apache.spark.{ SparkContext, SparkConf }
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.flume._
/**
@Arnold1
Arnold1 / TweetStreams.scala
Created Nov 11, 2017 — forked from samklr/TweetStreams.scala
TweetStream with Spark
View TweetStreams.scala
import Utils
import org.apache.spark.streaming.StreamingContext._
import org.apache.spark.streaming.{Seconds, StreamingContext}
import StreamingContext._
import org.apache.spark.SparkContext._
import org.apache.spark.streaming.twitter._
import org.apache.spark.SparkConf
import org.apache.spark.{SparkConf, SparkContext}