Skip to content

Instantly share code, notes, and snippets.

@rishav-rohit
rishav-rohit / GeoEncodeUDF.java
Created September 30, 2014 16:09
Hive GenericUDF for getting latitude and longitude
package com.rishav.hadoop.hive.ql.udf.generic;
import java.util.ArrayList;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
@rishav-rohit
rishav-rohit / GeoLatLng.java
Created September 30, 2014 16:08
Helper class to get latitude and longitude for a location
package com.rishav.hadoop.hive.ql.udf.utils;
import java.io.IOException;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.httpclient.methods.GetMethod;
@rishav-rohit
rishav-rohit / UnionJob.java
Created September 16, 2014 07:24
HBase multi table input union example driver
package com.rishav.hbase.union;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
@rishav-rohit
rishav-rohit / UnionReducer.java
Created September 16, 2014 07:24
HBase multi table input union example reducer
package com.rishav.hbase.union;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
@rishav-rohit
rishav-rohit / UnionMapper.java
Created September 16, 2014 07:23
HBase multi table input union example mapper
package com.rishav.hbase.union;
import java.util.Arrays;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableSplit;
import org.apache.hadoop.hbase.util.Bytes;
@rishav-rohit
rishav-rohit / student_marks.java
Created September 14, 2014 18:42
Avro Java schema file student_marks.java
/**
* Autogenerated by Avro
*
* DO NOT EDIT DIRECTLY
*/
package com.rishav.avro;
@SuppressWarnings("all")
@org.apache.avro.specific.AvroGenerated
public class student_marks extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"student_marks\",\"namespace\":\"com.rishav.avro\",\"fields\":[{\"name\":\"student_id\",\"type\":\"int\"},{\"name\":\"subject_id\",\"type\":\"int\"},{\"name\":\"marks\",\"type\":\"int\"}]}");
@rishav-rohit
rishav-rohit / gist:064a940c926b66836cf3
Created June 27, 2014 12:49
Update fixed number of MongoDB records with batchId
public class UpdateMongoBatchId {
public static void main(String[] args) {
Integer batchId = new Integer(args[0]);
try {
Mongo mongo = new Mongo("10.x.x.x", 27017);
DB db = mongo.getDB("dbname");
DBCollection coll1 = db.getCollection("collname");
// MongoDB find conditions
package com.rishav.avro.mapreduce;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyInputFormat;
import org.apache.avro.mapreduce.AvroKeyValueOutputFormat;
import org.apache.hadoop.conf.Configured;
--1. Create a Hive table stored as textfile
USE test;
CREATE TABLE csv_table (
student_id INT,
subject_id INT,
marks INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
--2. Load csv_table with student.csv data
package com.hadoop.imcdp;
import java.io.*;
import java.util.Set;
import java.lang.Integer;
import org.apache.hadoop.io.*;
public class IntPair implements WritableComparable<IntPair>{
private IntWritable first;