Skip to content

Instantly share code, notes, and snippets.

Created August 26, 2012 07:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/3475934 to your computer and use it in GitHub Desktop.
Save anonymous/3475934 to your computer and use it in GitHub Desktop.
Data Intensive Text Processing with MapReduce #3 figure3.9.x Reducer
package info.moaikids.mapred.reduce;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.MapWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Reducer;
public class Figure39xReducer extends
Reducer<Text, MapWritable, Text, DoubleWritable> {
static final int MIN = 1;
@Override
protected void reduce(Text key, Iterable<MapWritable> values,
Context context) throws IOException, InterruptedException {
Map<String, Integer> map = new HashMap<String, Integer>();
int total = 0;
for (MapWritable value : values) {
for (Entry<Writable, Writable> entry : value.entrySet()) {
String text = ((Text) entry.getKey()).toString();
int count = ((IntWritable) entry.getValue()).get();
total += count;
if (map.containsKey(text)) {
map.put(text, map.get(text) + count);
} else {
map.put(text, count);
}
}
}
for (Entry<String, Integer> entry : map.entrySet()) {
if (entry.getValue() > MIN) {
context.write(new Text(key.toString() + " " + entry.getKey()),
new DoubleWritable(((double)entry.getValue()) / total));
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment