Skip to content

Instantly share code, notes, and snippets.

@woodywang
Last active December 10, 2015 00:29
Show Gist options
  • Save woodywang/4351448 to your computer and use it in GitHub Desktop.
Save woodywang/4351448 to your computer and use it in GitHub Desktop.
package cn.com.admaster.hadoop.compress;
import com.hadoop.compression.lzo.LzopCodec;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import java.io.IOException;
/**
* Created with IntelliJ IDEA.
* User: woody
* Date: 12/21/12
* Time: 1:43 PM
* To change this template use File | Settings | File Templates.
*/
public class Lzo {
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: Lzo <input path> <output path>");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(Lzo.class);
job.setNumReduceTasks(1);
job.setMapperClass(LzoMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(LzoReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
job.setJobName("Lzo Compression: " + args[1]);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
static class LzoMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString().trim();
if (line.length() > 0) {
context.write(key, value);
}
}
}
static class LzoReducer extends Reducer<LongWritable, Text, Text, Text> {
@Override
public void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text v: values) {
context.write(null, v);
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment