Ben Fradet BenFradet

## reviews.sh
#!/bin/bash

IFS=$'\n' read -d '' -r files <<< $(git diff --name-only)

IFS=' '

for file in ${files[@]}
do
    read h m <<< $(git log -n 1 --oneline $file)
    git add $file

## build.sbt
lazy val fs2Version = "0.9.7"
lazy val circeVersion = "0.8.0"

lazy val root = (project in file("."))
  .settings(
    name := "circe-fs2-example",
    scalaVersion := "2.12.3",
    libraryDependencies ++=
      "co.fs2" %% "fs2-io" % fs2Version +: Seq(
      "io.circe" %% "circe-generic",

## bench.scala
val l = (1 to 1000).map {
 case i if i % 2 == 0 => "even" -> i
 case i => "odd" -> i
}

// https://github.com/Ichoran/thyme

th.pbench(l.groupBy(_._1).mapValues(_.map(_._2)))
//Benchmark (5100 calls in 334.7 ms)
//Time:    38.85 us   95% CI 37.74 us - 39.95 us   (n=18)

## test.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                BenFradet
                / test.md
            
            
              Created
              March 14, 2018 16:51
            
          
    | Scala Stream Collector                                                                |


JAR
Targeted platform


snowplow-stream-collector-google-pubsub-version.jar
[Google Cloud PubSub][pubsub]


snowplow-stream-collector-kinesis-version.jar
[Amazon Kinesis][kinesis]


snowplow-stream-collector-kafka-version.jar
[Apache Kafka][kafka]


snowplow-stream-collector-nsq-version.jar
[NSQ][nsq]


## athena.txt
app_id string,platform string,etl_tstamp string,collector_tstamp string,dvce_created_tstamp string,event string,event_id string,txn_id string,name_tracker string,v_tracker string,v_collector string,v_etl string,user_id string,user_ipaddress string,user_fingerprint string,domain_userid string,domain_sessionidx string,network_userid string,geo_country string,geo_region string,geo_city string,geo_zipcode string,geo_latitude string,geo_longitude string,geo_region_name string,ip_isp string,ip_organization string,ip_domain string,ip_netspeed string,page_url string,page_title string,page_referrer string,page_urlscheme string,page_urlhost string,page_urlport string,page_urlpath string,page_urlquery string,page_urlfragment string,refr_urlscheme string,refr_urlhost string,refr_urlport string,refr_urlpath string,refr_urlquery string,refr_urlfragment string,refr_medium string,refr_source string,refr_term string,mkt_medium string,mkt_source string,mkt_term string,mkt_content string,mkt_campaign string,contexts string,se_c

## metrics.json
{
  "metricTime": "2018-06-26T15:59:26.823Z",
  "metrics": [
    {
      "name": {
        "origin": "dataflow/v1b3",
        "name": "ExecutionTime_ProcessElement",
        "context": {
          "step": "s12",
          "tentative": "true"

## jobs.json
{
  "jobs": [
    {
      "id": "2018-06-26_08_56_30-1834382669836645220",
      "projectId": "engineering-sandbox",
      "name": "ben-enrich",
      "type": "JOB_TYPE_STREAMING",
      "currentState": "JOB_STATE_RUNNING",
      "currentStateTime": "2018-06-26T15:56:59.368546Z",
      "createTime": "2018-06-26T15:56:31.142093Z",

## barchart.js
var MONTHS = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'];
		var color = Chart.helpers.color;
		var barChartData = {
			labels: ['January', 'February', 'March', 'April', 'May', 'June', 'July'],
			datasets: [{
				label: 'Dataset 1',
				backgroundColor: color(window.chartColors.red).alpha(0.5).rgbString(),
				borderColor: window.chartColors.red,
				borderWidth: 1,
				data: [

## linechart.js

		var MONTHS = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'];
		var config = {
			type: 'line',
			data: {
				labels: ['January', 'February', 'March', 'April', 'May', 'June', 'July'],
				datasets: [{
					label: 'My First dataset',
					backgroundColor: window.chartColors.red,
					borderColor: window.chartColors.red,

## wiki.diff
-There is currently no workaround for scenario 1. We avoid scenario 2 by configuring our Elastic Beanstalk environment to never automatically scale down.
+There is currently no workaround for scenario 1.
+
+When scaling down, scenario 2 above, we can associate [a lifecycle hook](http://docs.amazonaws.cn/en_us/autoscaling/ec2/userguide/lifecycle-hooks.html)
+to the auto-scaling group so that the instance spends 2 hours in a waiting state. This ensures that
+all the logs present in the machine are correctly rotated to S3 given it happens once every hour.
	#!/bin/bash

	IFS=$'\n' read -d '' -r files <<< $(git diff --name-only)

	IFS=' '

	for file in ${files[@]}
	do
	read h m <<< $(git log -n 1 --oneline $file)
	git add $file
	lazy val fs2Version = "0.9.7"
	lazy val circeVersion = "0.8.0"

	lazy val root = (project in file("."))
	.settings(
	name := "circe-fs2-example",
	scalaVersion := "2.12.3",
	libraryDependencies ++=
	"co.fs2" %% "fs2-io" % fs2Version +: Seq(
	"io.circe" %% "circe-generic",
	val l = (1 to 1000).map {
	case i if i % 2 == 0 => "even" -> i
	case i => "odd" -> i
	}

	// https://github.com/Ichoran/thyme

	th.pbench(l.groupBy(_._1).mapValues(_.map(_._2)))
	//Benchmark (5100 calls in 334.7 ms)
	//Time: 38.85 us 95% CI 37.74 us - 39.95 us (n=18)
JAR	Targeted platform
snowplow-stream-collector-google-pubsub-version.jar	[Google Cloud PubSub][pubsub]
snowplow-stream-collector-kinesis-version.jar	[Amazon Kinesis][kinesis]
snowplow-stream-collector-kafka-version.jar	[Apache Kafka][kafka]
snowplow-stream-collector-nsq-version.jar	[NSQ][nsq]
	{
	"metricTime": "2018-06-26T15:59:26.823Z",
	"metrics": [
	{
	"name": {
	"origin": "dataflow/v1b3",
	"name": "ExecutionTime_ProcessElement",
	"context": {
	"step": "s12",
	"tentative": "true"
	{
	"jobs": [
	{
	"id": "2018-06-26_08_56_30-1834382669836645220",
	"projectId": "engineering-sandbox",
	"name": "ben-enrich",
	"type": "JOB_TYPE_STREAMING",
	"currentState": "JOB_STATE_RUNNING",
	"currentStateTime": "2018-06-26T15:56:59.368546Z",
	"createTime": "2018-06-26T15:56:31.142093Z",
	var MONTHS = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'];
	var color = Chart.helpers.color;
	var barChartData = {
	labels: ['January', 'February', 'March', 'April', 'May', 'June', 'July'],
	datasets: [{
	label: 'Dataset 1',
	backgroundColor: color(window.chartColors.red).alpha(0.5).rgbString(),
	borderColor: window.chartColors.red,
	borderWidth: 1,
	data: [

	var MONTHS = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'];
	var config = {
	type: 'line',
	data: {
	labels: ['January', 'February', 'March', 'April', 'May', 'June', 'July'],
	datasets: [{
	label: 'My First dataset',
	backgroundColor: window.chartColors.red,
	borderColor: window.chartColors.red,
	-There is currently no workaround for scenario 1. We avoid scenario 2 by configuring our Elastic Beanstalk environment to never automatically scale down.
	+There is currently no workaround for scenario 1.
	+
	+When scaling down, scenario 2 above, we can associate [a lifecycle hook](http://docs.amazonaws.cn/en_us/autoscaling/ec2/userguide/lifecycle-hooks.html)
	+to the auto-scaling group so that the instance spends 2 hours in a waiting state. This ensures that
	+all the logs present in the machine are correctly rotated to S3 given it happens once every hour.