Gautam Singaraju gautamsingaraju

## .bashrc
#export PS1="\[\033[36m\]\u\[\033[m\]@\[\033[32m\]\h:\[\033[33;1m\]\w\[\033[m\]\$ "
export CLICOLOR=1
export LSCOLORS=ExFxCxDxBxegedabagacad
# CDF: changes directory to the directory thats currently opened in finder.
cdf() {
    target=`osascript -e 'tell application "Finder" to if (count of Finder windows) > 0 then get POSIX path of (target of front Finder window as text)'`
    if [ "$target" != "" ]; then
        cd "$target"; pwd
    else
        echo 'No Finder window found' >&2

## loadFiles.java
private Path[] localFiles;
public void configure(JobConf job) {
  // Get the cached archives/files
  try {
    localFiles = new Path[0];
    localFiles = DistributedCache.getLocalCacheFiles(job);
    //Access the files you put in the cache as localFiles[0].toString() etc.
  } catch (IOException e) {
    System.err.println("Caught exception while getting cached files: " + StringUtils.stringifyException(e));
    //To change body of catch statement use File | Settings | File Templates.

## distributedCache.java
try {
  DistributedCache.addCacheFile(new URI("/user/hadoop/GeoLiteCity.dat"), conf);
} catch (URISyntaxException e) {
  e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
}

## ChainMapper.java
public static void main(String[] args) {
        JobClient client = new JobClient();
        JobConf conf = new JobConf(chainMapper.class);
        conf.setJobName("Indexer");
        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        JobConf mapAConf = new JobConf(false);
        ChainMapper.addMapper(conf, LineIndexMapper.class, LongWritable.class, Text.class, Text.class, Text.class, true, mapAConf);
        JobConf reduceConf = new JobConf(false);

## GeoDistanceTwoIPs.java
import com.maxmind.geoip.Location;
import com.maxmind.geoip.LookupService;
import java.io.IOException;
/**
* Created by IntelliJ IDEA.
* User: Gautam
* Date: Dec 16, 2009
* Time: 11:32:23 AM
* To change this template use File | Settings | File Templates.
*/

## gchat.java
package util;

import org.jivesoftware.smack.*;
import org.jivesoftware.smack.packet.Message;

import java.util.Calendar;
import java.util.HashMap;
import java.util.Iterator;
import java.text.SimpleDateFormat;

## threadCommand.java
/**
* Created by IntelliJ IDEA.
* User: Gautam
* Date: Jan 15, 2009
* Time: 9:35:12 AM
* To change this template use File | Settings | File Templates.
*/

import java.io.*;

## SparkMLKMeans.scala
/*
* Parts of the code is from: http://spark.apache.org/docs/latest/mllib-clustering.html
*
* Modifications:
* 1. use sql context instead of sc in Zeppelin
* 2. use cluster information to predict which cluster does a sample belong in.
* 3. register a scala method as a UDF
*/
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.mllib.linalg.Vectors

## queryDF.sql
%sql
select * from Audience

## createRDD.scala
//import org.apache.commons.io.IOUtils
//import java.net.URL
//import java.nio.charset.Charset

// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)
// So you don't need create them manually
// load bank data

val input  =  sc.textFile ("/dataset/train.csv")
val format  = new  java.text.Simple.DateFormat("yyyy-MM-dd HH:mm:ss")
	#export PS1="\[\033[36m\]\u\[\033[m\]@\[\033[32m\]\h:\[\033[33;1m\]\w\[\033[m\]\$ "
	export CLICOLOR=1
	export LSCOLORS=ExFxCxDxBxegedabagacad
	# CDF: changes directory to the directory thats currently opened in finder.
	cdf() {
	target=`osascript -e 'tell application "Finder" to if (count of Finder windows) > 0 then get POSIX path of (target of front Finder window as text)'`
	if [ "$target" != "" ]; then
	cd "$target"; pwd
	else
	echo 'No Finder window found' >&2
	private Path[] localFiles;
	public void configure(JobConf job) {
	// Get the cached archives/files
	try {
	localFiles = new Path[0];
	localFiles = DistributedCache.getLocalCacheFiles(job);
	//Access the files you put in the cache as localFiles[0].toString() etc.
	} catch (IOException e) {
	System.err.println("Caught exception while getting cached files: " + StringUtils.stringifyException(e));
	//To change body of catch statement use File \| Settings \| File Templates.
	try {
	DistributedCache.addCacheFile(new URI("/user/hadoop/GeoLiteCity.dat"), conf);
	} catch (URISyntaxException e) {
	e.printStackTrace(); //To change body of catch statement use File \| Settings \| File Templates.
	}
	public static void main(String[] args) {
	JobClient client = new JobClient();
	JobConf conf = new JobConf(chainMapper.class);
	conf.setJobName("Indexer");
	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(Text.class);

	JobConf mapAConf = new JobConf(false);
	ChainMapper.addMapper(conf, LineIndexMapper.class, LongWritable.class, Text.class, Text.class, Text.class, true, mapAConf);
	JobConf reduceConf = new JobConf(false);
	import com.maxmind.geoip.Location;
	import com.maxmind.geoip.LookupService;
	import java.io.IOException;
	/**
	* Created by IntelliJ IDEA.
	* User: Gautam
	* Date: Dec 16, 2009
	* Time: 11:32:23 AM
	* To change this template use File \| Settings \| File Templates.
	*/
	package util;

	import org.jivesoftware.smack.*;
	import org.jivesoftware.smack.packet.Message;

	import java.util.Calendar;
	import java.util.HashMap;
	import java.util.Iterator;
	import java.text.SimpleDateFormat;
	/**
	* Created by IntelliJ IDEA.
	* User: Gautam
	* Date: Jan 15, 2009
	* Time: 9:35:12 AM
	* To change this template use File \| Settings \| File Templates.
	*/

	import java.io.*;
	/*
	* Parts of the code is from: http://spark.apache.org/docs/latest/mllib-clustering.html
	*
	* Modifications:
	* 1. use sql context instead of sc in Zeppelin
	* 2. use cluster information to predict which cluster does a sample belong in.
	* 3. register a scala method as a UDF
	*/
	import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
	import org.apache.spark.mllib.linalg.Vectors
	//import org.apache.commons.io.IOUtils
	//import java.net.URL
	//import java.nio.charset.Charset

	// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)
	// So you don't need create them manually
	// load bank data

	val input = sc.textFile ("/dataset/train.csv")
	val format = new java.text.Simple.DateFormat("yyyy-MM-dd HH:mm:ss")