Skip to content

Instantly share code, notes, and snippets.

Created August 26, 2012 06:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/3475214 to your computer and use it in GitHub Desktop.
Save anonymous/3475214 to your computer and use it in GitHub Desktop.
Data Intensive Text Processing with MapReduce #3 figure3.3 Mapper
package info.moaikids.mapred.map;
import info.moaikids.chunker.Chunker;
import info.moaikids.chunker.KuromojiChunker;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Figure33Mapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
Map<String, Integer> associativeArray;
Chunker chunker = new KuromojiChunker();
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
associativeArray = new HashMap<String, Integer>();
}
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
for (String chunk : chunker.chunking(value.toString())) {
if (chunk.trim().isEmpty()) {
continue;
}
if (associativeArray.containsKey(chunk)) {
associativeArray.put(chunk, associativeArray.get(chunk) + 1);
} else {
associativeArray.put(chunk, 1);
}
}
}
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
for (Entry<String, Integer> entry : associativeArray.entrySet()) {
context.write(new Text(entry.getKey()),
new IntWritable(entry.getValue()));
}
super.cleanup(context);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment