Sam Bessalah samklr

## highways.py
#!/usr/bin/env python

import bigml
from bigml.api import BigML

# You need to define BIGML_USERNAME and BIGML_API_KEY in your environment, or
# add them here:
#api = BigML(username, api_key, dev_mode=True)

api = BigML(dev_mode=True)

## SaveCountersToHdfs.scala
import java.io.PrintWriter

import cascading.stats.CascadingStats
import com.twitter.scalding._

/**
 * Writes all custom counters into a tsv file args("counters-file") if this property is set.
 *
 * Output format:
 * counter_name value

## benchmark-commands.txt
Producer

Setup
bin/kafka-topics.sh --zookeeper esv4-hcl197.grid.linkedin.com:2181 --create --topic test-rep-one --partitions 6 --replication-factor 1
bin/kafka-topics.sh --zookeeper esv4-hcl197.grid.linkedin.com:2181 --create --topic test --partitions 6 --replication-factor 3

Single thread, no replication

bin/kafka-run-class.sh org.apache.kafka.clients.tools.ProducerPerformance test7 50000000 100 -1 acks=1 bootstrap.servers=esv4-hcl198.grid.linkedin.com:9092 buffer.memory=67108864 batch.size=8196

## build.sbt
name := "so_analytics"

version := "0.0.1-SNAPSHOT"

scalaVersion := "2.10.3"

scalacOptions ++= Seq("-deprecation", "-feature")

libraryDependencies += "org.scalatest" %% "scalatest" % "1.9.1" % "test"

## ApplicationGuiceModule.java
package com.developerb.dropbot;

import com.developerb.dropbot.instrumentation.MethodInvocationTimingInterceptor;
import com.google.inject.AbstractModule;
import com.yammer.metrics.annotation.Timed;

import static com.google.inject.matcher.Matchers.annotatedWith;
import static com.google.inject.matcher.Matchers.any;

/**

## Elasticsearch install script
#!/usr/bin/env bash

# update apt
sudo apt-get update

# install java
sudo apt-get install openjdk-7-jre-headless -y

# install elasticsearch
wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.5.0.deb

## DataStream.scala
import scala.collection.immutable.List

import spark.SparkContext._
import spark.streaming._
import spark.streaming.StreamingContext._
import spark.streaming.dstream._

object DataStream extends App {

  val reportHeader = """----------------------------------------------

## gist:b37d266d219da72936a5

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                samklr
                / gist:b37d266d219da72936a5
            
            
              Last active
              August 29, 2015 14:18
                — forked from debasishg/gist:b4df1648d3f1776abdff
            
          
Feature Learning


Learning Feature Representations with K-means by Adam Coates and Andrew Y. Ng
The devil is in the details: an evaluation of recent feature encoding methods by Chatfield et. al.
Emergence of Object-Selective Features in Unsupervised Feature Learning by Coates, Ng
Scaling Learning Algorithms towards AI Benjio & LeCun


Deep Learning


Dropout: A Simple Way to Prevent Neural Networks from Overfitting by Nitish Srivastava, Geoffrey Hinton, Alex Krizhevsky, Ilya Sutskever and Ruslan Salakhutdinov
[Understanding the difficulty of training deep feedforward neural networks](http://jmlr.org/proceedings/papers/v9/glorot10a/gloro


## kafka.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                samklr
                / kafka.md
            
            
              Last active
              August 29, 2015 14:18
                — forked from ashrithr/kafka.md
            
          
    Introduction to Kafka

Kafka acts as a kind of write-ahead log (WAL) that records messages to a persistent store (disk) and allows subscribers to read and apply these changes to their own stores in a system appropriate time-frame.
Terminology:

Producers send messages to brokers
Consumers read messages from brokers
Messages are sent to a topic


## kafka-move-leadership.sh
#!/usr/bin/env bash
#
# File:      kafka-move-leadership.sh
#
# Description
# ===========
#
# Generates a Kafka partition reassignment JSON snippet to STDOUT to move the leadership
# of any replicas away from the provided "source" broker to different, randomly selected
# "target"brokers.  Run this script with `-h` to show detailed usage instructions.
	#!/usr/bin/env python

	import bigml
	from bigml.api import BigML

	# You need to define BIGML_USERNAME and BIGML_API_KEY in your environment, or
	# add them here:
	#api = BigML(username, api_key, dev_mode=True)

	api = BigML(dev_mode=True)
	import java.io.PrintWriter

	import cascading.stats.CascadingStats
	import com.twitter.scalding._

	/**
	* Writes all custom counters into a tsv file args("counters-file") if this property is set.
	*
	* Output format:
	* counter_name value
	Producer

	Setup
	bin/kafka-topics.sh --zookeeper esv4-hcl197.grid.linkedin.com:2181 --create --topic test-rep-one --partitions 6 --replication-factor 1
	bin/kafka-topics.sh --zookeeper esv4-hcl197.grid.linkedin.com:2181 --create --topic test --partitions 6 --replication-factor 3

	Single thread, no replication

	bin/kafka-run-class.sh org.apache.kafka.clients.tools.ProducerPerformance test7 50000000 100 -1 acks=1 bootstrap.servers=esv4-hcl198.grid.linkedin.com:9092 buffer.memory=67108864 batch.size=8196
	name := "so_analytics"

	version := "0.0.1-SNAPSHOT"

	scalaVersion := "2.10.3"

	scalacOptions ++= Seq("-deprecation", "-feature")

	libraryDependencies += "org.scalatest" %% "scalatest" % "1.9.1" % "test"
	package com.developerb.dropbot;

	import com.developerb.dropbot.instrumentation.MethodInvocationTimingInterceptor;
	import com.google.inject.AbstractModule;
	import com.yammer.metrics.annotation.Timed;

	import static com.google.inject.matcher.Matchers.annotatedWith;
	import static com.google.inject.matcher.Matchers.any;

	/**
	#!/usr/bin/env bash

	# update apt
	sudo apt-get update

	# install java
	sudo apt-get install openjdk-7-jre-headless -y

	# install elasticsearch
	wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.5.0.deb
	import scala.collection.immutable.List

	import spark.SparkContext._
	import spark.streaming._
	import spark.streaming.StreamingContext._
	import spark.streaming.dstream._

	object DataStream extends App {

	val reportHeader = """----------------------------------------------
	#!/usr/bin/env bash
	#
	# File: kafka-move-leadership.sh
	#
	# Description
	# ===========
	#
	# Generates a Kafka partition reassignment JSON snippet to STDOUT to move the leadership
	# of any replicas away from the provided "source" broker to different, randomly selected
	# "target"brokers. Run this script with `-h` to show detailed usage instructions.