Skip to content

Instantly share code, notes, and snippets.

Created August 26, 2012 07:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/3475607 to your computer and use it in GitHub Desktop.
Save anonymous/3475607 to your computer and use it in GitHub Desktop.
Data Intensive Text Processing with MapReduce #3 figure3.8 Mapper
package info.moaikids.mapred.map;
import info.moaikids.chunker.Chunker;
import info.moaikids.chunker.KuromojiChunker;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Figure38Mapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
Chunker chunker = new KuromojiChunker();
static final IntWritable ONE = new IntWritable(1);
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
}
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
for (String line : value.toString().split("。|\n")) {
line = line.replaceAll(" ", "").trim();
String[] chunks = chunker.chunking(line);
if (chunks.length > 1) {
for (int i = 0; i < chunks.length; i++) {
for (int j = 0; j < chunks.length; j++) {
if (i >= j) {
continue;
}
context.write(new Text(chunks[i] + " " + chunks[j]),
ONE);
}
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment