Skip to content

Instantly share code, notes, and snippets.

Created August 27, 2012 07:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/3486554 to your computer and use it in GitHub Desktop.
Save anonymous/3486554 to your computer and use it in GitHub Desktop.
Data Intensive Text Processing with MapReduce #3 SecondarySort (Reduce Side) Reducer
package info.moaikids.mapred.reduce;
import java.io.IOException;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class ReduceSecondarySortReducer extends
Reducer<Text, Text, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
TreeMap<String, Integer> map = new TreeMap<String, Integer>();
for (Text value : values) {
String tsv = value.toString().trim();
if (tsv.isEmpty()) {
continue;
}
String[] params = tsv.split("\t");
String year = params[0].trim();
if (map.containsKey(year)) {
map.put(year, map.get(year) + 1);
} else {
map.put(year, 1);
}
}
for (Entry<String, Integer> entry : map.entrySet()) {
context.write(new Text(key.toString() + " " + entry.getKey()),
new IntWritable(entry.getValue()));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment