Skip to content

Instantly share code, notes, and snippets.

View malaikannan's full-sized avatar
:octocat:
Focusing

Malaikannan Sankarasubbu malaikannan

:octocat:
Focusing
  • San Francisco, CA
View GitHub Profile
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.Serializable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
public class ColorWritable implements WritableComparable<ColorWritable>,
Serializable {
public class SampleReducer extends Reducer {
public void reduce(Key key, Iterable<Value> values, Context context) {
List<Value> list = new ArrayList<Value>();
for (Value value : values)
list.add(value);
// Do something with the buffer
}
}
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
@malaikannan
malaikannan / pom.xml
Created December 4, 2015 05:18
POM.XML
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.malai.company</groupId>
<artifactId>POC</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>jdk.tools</groupId>
<artifactId>jdk.tools</artifactId>
<scope>system</scope>
@malaikannan
malaikannan / spark_ipython.py
Created December 10, 2015 14:06
Ipython for Spark
import os
import sys
os.environ['SPARK_HOME'] = "C:/spark/spark-1.5.1-bin-hadoop2.6"
sys.path.append("C:/spark/spark-1.5.1-bin-hadoop2.6/bin")
sys.path.append("C:/spark/spark-1.5.1-bin-hadoop2.6/python")
sys.path.append("C:/spark/spark-1.5.1-bin-hadoop2.6/python/pyspark")
sys.path.append("C:/spark/spark-1.5.1-bin-hadoop2.6/python/lib")
sys.path.append("C:/spark/spark-1.5.1-bin-hadoop2.6/python/lib/pyspark.zip")
sys.path.append("C:/spark/spark-1.5.1-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip")
sys.path.append("C:/spark/spark-1.5.1-bin-hadoop2.6/python/lib/py4j-0")
rdd = sc.textFile("C:/spark/spark-1.5.1-bin-hadoop2.6/README.md")
rdd.count()
name := "sample_sbt_spark_poc"
version := "1.0"
scalaVersion := "2.10.4"
libraryDependencies ++= Seq (
"org.apache.spark" % "spark-core_2.10" % "1.6.0"
/**
* Created by malai on 2/1/16.
*/
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
object WordCount {
def main(args:Array[String]) : Unit = {
import numpy as np
#sigmoid activation function
# This can be replaced by other activation function like RELU, Tanh etc.
def sigmoid(x,derivative=False):
if(derivative==True):
return x*(1-x)
return 1/(1+np.exp(-x))
l2_error = y-l2
error = np.mean(np.abs(l2_error))
print error
#calculating weights difference using gradient
l2_delta = l2_error*sigmoid(l2, derivative=True)