public
Last active

A patch that makes Hadoop's sort example support textual sorts

  • Download Gist
Sort.java
Java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
diff --git src/examples/org/apache/hadoop/examples/Sort.java src/examples/org/apache/hadoop/examples/Sort.java
index a028009..40c7647 100644
--- src/examples/org/apache/hadoop/examples/Sort.java
+++ src/examples/org/apache/hadoop/examples/Sort.java
@@ -20,13 +20,18 @@ package org.apache.hadoop.examples;
import java.io.IOException;
import java.net.URI;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.*;
@@ -66,6 +71,23 @@ public class Sort<K,V> extends Configured implements Tool {
}
/**
+ * This mapper converts the output of the TextInputFormat into a sortable
+ * list of lines.
+ */
+ public static class TextMapper
+ extends MapReduceBase
+ implements Mapper<LongWritable, Text, Text, NullWritable> {
+
+ private static final NullWritable NIL = NullWritable.get();
+
+ public void map(LongWritable key, Text value,
+ OutputCollector<Text, NullWritable> output,
+ Reporter reporter) throws IOException {
+ output.collect(value, NIL);
+ }
+ }
+
+ /**
* The main driver for sort program.
* Invoke this method to submit the map/reduce job.
* @throws IOException When there is communication problems with the
@@ -101,6 +123,13 @@ public class Sort<K,V> extends Configured implements Tool {
jobConf.setNumMapTasks(Integer.parseInt(args[++i]));
} else if ("-r".equals(args[i])) {
num_reduces = Integer.parseInt(args[++i]);
+ } else if ("-text".equals(args[i])) {
+ // set the types to get a textual sort of the input
+ inputFormatClass = TextInputFormat.class;
+ jobConf.setMapperClass(TextMapper.class);
+ outputKeyClass = Text.class;
+ outputValueClass = NullWritable.class;
+ outputFormatClass = TextOutputFormat.class;
} else if ("-inFormat".equals(args[i])) {
inputFormatClass =
Class.forName(args[++i]).asSubclass(InputFormat.class);

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.