Skip to content

Instantly share code, notes, and snippets.

View krishnakalyan3's full-sized avatar

Krishna Kalyan krishnakalyan3

View GitHub Profile
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class VowelConsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class VowelConsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text letterType,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
public class VowelConsPartitioner extends Partitioner<Text, IntWritable> {
@Override
public int getPartition(Text letterType, IntWritable count, int redCnt) {
package UDF;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.regex.Matcher;
@krishnakalyan3
krishnakalyan3 / input
Created November 1, 2014 03:34
Input
1
2
3
4
1
1
1
@krishnakalyan3
krishnakalyan3 / Sum
Created November 1, 2014 03:43
Sum Operation
-- Plain Sum
B = group A All;
C = foreach B generate SUM($1);
dump C;
-- Group And Sum
A = load 'input' as (number:int);
B = group A by $0;
C = foreach B generate SUM($1);
dump C;
@krishnakalyan3
krishnakalyan3 / Cast
Created November 2, 2014 04:20
pig Cast
-- Load
A = load 'input' using PigStorage(',');
-- this will generate all columns after the first one
B = foreach A generate $1..;
--Suppose you need to cast the
C = FOREACH A generate (chararray)$0,(int)$1,(int)$2,(chararray)$3,(chararray)$4;
dump C;
@krishnakalyan3
krishnakalyan3 / HBase
Created November 2, 2014 04:34
Table with Sno|Name|Age
1|Krishna|23
2|Madhuri|37
3|Kalyan|54
4|Shobhana|50