Skip to content

Instantly share code, notes, and snippets.

@Andromelus
Created December 2, 2019 13:32
Show Gist options
  • Save Andromelus/fe4993046f8971ce87fb20e3fd85d530 to your computer and use it in GitHub Desktop.
Save Andromelus/fe4993046f8971ce87fb20e3fd85d530 to your computer and use it in GitHub Desktop.
package sdz.hadoop.wordcount;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.Reducer;
import java.util.StringTokenizer;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCountDriver extends Configured implements Tool {
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.out.println("Usage: [input] [output]");
System.exit(-1);
}
Job job = Job.getInstance(getConf());
job.setJobName("wordcount");
job.setJarByClass(WordCountDriver.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
Path inputFilePath = new Path(args[0]);
Path outputFilePath = new Path(args[1]);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.addInputPath(job, inputFilePath);
FileOutputFormat.setOutputPath(job, outputFilePath);
FileSystem fs = FileSystem.newInstance(getConf());
if (fs.exists(outputFilePath)) {
fs.delete(outputFilePath, true);
}
return job.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
WordCountDriver wordcountDriver = new WordCountDriver();
int res = ToolRunner.run(wordcountDriver, args);
System.exit(res);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment