Skip to content

Instantly share code, notes, and snippets.

@samklr
Last active December 17, 2015 18:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save samklr/5652693 to your computer and use it in GitHub Desktop.
Save samklr/5652693 to your computer and use it in GitHub Desktop.
Hadoop MR Count Word
public class CountWordsMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
public void map(LongWritable key, Text value, Context context) throws
IOException, InterruptedException {
String line = value.toString();
StringTokenizer itr = new StringTokenizer(line);
while (itr.hasMoreTokens()) {
context.write(new Text(itr.nextToken()), new IntWritable(1));
}
}
}
public class CountWordsReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: CountWords <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(CountWords.class);
job.setJobName("Count Words");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(CountWordsMapper.class);
job.setReducerClass(CountWordsReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment