Skip to content

Instantly share code, notes, and snippets.

@omalley
Created February 8, 2012 16:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save omalley/1770850 to your computer and use it in GitHub Desktop.
Save omalley/1770850 to your computer and use it in GitHub Desktop.
A patch that makes Hadoop's sort example support textual sorts
diff --git src/examples/org/apache/hadoop/examples/Sort.java src/examples/org/apache/hadoop/examples/Sort.java
index a028009..40c7647 100644
--- src/examples/org/apache/hadoop/examples/Sort.java
+++ src/examples/org/apache/hadoop/examples/Sort.java
@@ -20,13 +20,18 @@ package org.apache.hadoop.examples;
import java.io.IOException;
import java.net.URI;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.*;
@@ -66,6 +71,23 @@ public class Sort<K,V> extends Configured implements Tool {
}
/**
+ * This mapper converts the output of the TextInputFormat into a sortable
+ * list of lines.
+ */
+ public static class TextMapper
+ extends MapReduceBase
+ implements Mapper<LongWritable, Text, Text, NullWritable> {
+
+ private static final NullWritable NIL = NullWritable.get();
+
+ public void map(LongWritable key, Text value,
+ OutputCollector<Text, NullWritable> output,
+ Reporter reporter) throws IOException {
+ output.collect(value, NIL);
+ }
+ }
+
+ /**
* The main driver for sort program.
* Invoke this method to submit the map/reduce job.
* @throws IOException When there is communication problems with the
@@ -101,6 +123,13 @@ public class Sort<K,V> extends Configured implements Tool {
jobConf.setNumMapTasks(Integer.parseInt(args[++i]));
} else if ("-r".equals(args[i])) {
num_reduces = Integer.parseInt(args[++i]);
+ } else if ("-text".equals(args[i])) {
+ // set the types to get a textual sort of the input
+ inputFormatClass = TextInputFormat.class;
+ jobConf.setMapperClass(TextMapper.class);
+ outputKeyClass = Text.class;
+ outputValueClass = NullWritable.class;
+ outputFormatClass = TextOutputFormat.class;
} else if ("-inFormat".equals(args[i])) {
inputFormatClass =
Class.forName(args[++i]).asSubclass(InputFormat.class);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment