public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> { private static final IntWritable ONE = new IntWritable(1); private static final Text BIGRAM = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String prev = null; StringTokenizer itr = new StringTokenizer(line); while (itr.hasMoreTokens()) { String cur = itr.nextToken(); // Emit only if we have an actual bigram. if (prev != null) { BIGRAM.set(prev + " " + cur); context.write(BIGRAM, ONE); } prev = cur; } } }