Created
June 1, 2018 04:00
-
-
Save AhianZhang/bee6fce952169f52ad374fe24699b789 to your computer and use it in GitHub Desktop.
Hadoop MapReduce WordCount
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.fs.FileSystem; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.LongWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.apache.hadoop.mapreduce.Reducer; | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
import java.io.IOException; | |
/** | |
* Created by AhianZhang on 2018/5/11. | |
*/ | |
public class WordCount | |
{ | |
public static class MyMapepr extends Mapper<LongWritable, Text, Text, LongWritable> | |
{ | |
LongWritable one = new LongWritable(1); | |
@Override | |
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException | |
{ | |
//接收到每一行数据 | |
String line = value.toString(); | |
//按照指定分隔符进行拆分 | |
String[] words = line.split(" "); | |
for (String word : words) | |
{ | |
context.write(new Text(word), one); | |
} | |
} | |
} | |
public static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable> | |
{ | |
@Override | |
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException | |
{ | |
long sum = 0; | |
for (LongWritable value : values) | |
{ | |
sum += value.get(); | |
} | |
context.write(key, new LongWritable(sum)); | |
} | |
} | |
public static void main(String[] args) throws Exception | |
{ | |
Configuration configuration = new Configuration(); | |
Path outputPath = new Path(args[1]); | |
FileSystem fileSystem = FileSystem.get(configuration); | |
if (fileSystem.exists(outputPath)) | |
{ | |
fileSystem.delete(outputPath, true); | |
System.out.println("output file is already exitst but was deleted"); | |
} | |
Job job = Job.getInstance(configuration, "wordcount"); | |
job.setJarByClass(WordCount.class); | |
FileInputFormat.setInputPaths(job, new Path(args[0])); | |
job.setMapperClass(MyMapepr.class); | |
job.setMapOutputKeyClass(Text.class); | |
job.setOutputValueClass(LongWritable.class); | |
job.setReducerClass(MyReducer.class); | |
job.setOutputKeyClass(Text.class); | |
job.setMapOutputValueClass(LongWritable.class); | |
FileOutputFormat.setOutputPath(job, new Path(args[1])); | |
System.exit(job.waitForCompletion(true) ? 0 : 1); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment