Skip to content

Instantly share code, notes, and snippets.

View gautamsingaraju's full-sized avatar

Gautam Singaraju gautamsingaraju

View GitHub Profile
#export PS1="\[\033[36m\]\u\[\033[m\]@\[\033[32m\]\h:\[\033[33;1m\]\w\[\033[m\]\$ "
export CLICOLOR=1
export LSCOLORS=ExFxCxDxBxegedabagacad
# CDF: changes directory to the directory thats currently opened in finder.
cdf() {
target=`osascript -e 'tell application "Finder" to if (count of Finder windows) > 0 then get POSIX path of (target of front Finder window as text)'`
if [ "$target" != "" ]; then
cd "$target"; pwd
else
echo 'No Finder window found' >&2
private Path[] localFiles;
public void configure(JobConf job) {
// Get the cached archives/files
try {
localFiles = new Path[0];
localFiles = DistributedCache.getLocalCacheFiles(job);
//Access the files you put in the cache as localFiles[0].toString() etc.
} catch (IOException e) {
System.err.println("Caught exception while getting cached files: " + StringUtils.stringifyException(e));
//To change body of catch statement use File | Settings | File Templates.
try {
DistributedCache.addCacheFile(new URI("/user/hadoop/GeoLiteCity.dat"), conf);
} catch (URISyntaxException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
public static void main(String[] args) {
JobClient client = new JobClient();
JobConf conf = new JobConf(chainMapper.class);
conf.setJobName("Indexer");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
JobConf mapAConf = new JobConf(false);
ChainMapper.addMapper(conf, LineIndexMapper.class, LongWritable.class, Text.class, Text.class, Text.class, true, mapAConf);
JobConf reduceConf = new JobConf(false);
import com.maxmind.geoip.Location;
import com.maxmind.geoip.LookupService;
import java.io.IOException;
/**
* Created by IntelliJ IDEA.
* User: Gautam
* Date: Dec 16, 2009
* Time: 11:32:23 AM
* To change this template use File | Settings | File Templates.
*/
package util;
import org.jivesoftware.smack.*;
import org.jivesoftware.smack.packet.Message;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Iterator;
import java.text.SimpleDateFormat;
/**
* Created by IntelliJ IDEA.
* User: Gautam
* Date: Jan 15, 2009
* Time: 9:35:12 AM
* To change this template use File | Settings | File Templates.
*/
import java.io.*;
/*
* Parts of the code is from: http://spark.apache.org/docs/latest/mllib-clustering.html
*
* Modifications:
* 1. use sql context instead of sc in Zeppelin
* 2. use cluster information to predict which cluster does a sample belong in.
* 3. register a scala method as a UDF
*/
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.mllib.linalg.Vectors
%sql
select * from Audience
//import org.apache.commons.io.IOUtils
//import java.net.URL
//import java.nio.charset.Charset
// Zeppelin creates and injects sc (SparkContext) and sqlContext (HiveContext or SqlContext)
// So you don't need create them manually
// load bank data
val input = sc.textFile ("/dataset/train.csv")
val format = new java.text.Simple.DateFormat("yyyy-MM-dd HH:mm:ss")