Created
December 16, 2013 20:46
-
-
Save kharazi/7994089 to your computer and use it in GitHub Desktop.
WordCount
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapred.FileInputFormat; | |
import org.apache.hadoop.mapred.FileOutputFormat; | |
import org.apache.hadoop.mapred.JobClient; | |
import org.apache.hadoop.mapred.JobConf; | |
public class WordCount { | |
public static void main(String[] args) { | |
JobClient client = new JobClient(); | |
JobConf conf = new JobConf(WordCount.class); | |
// specify output types | |
conf.setOutputKeyClass(Text.class); | |
conf.setOutputValueClass(IntWritable.class); | |
// specify input and output dirs | |
FileInputPath.addInputPath(conf, new Path("input")); | |
FileOutputPath.addOutputPath(conf, new Path("output")); | |
// specify a mapper | |
conf.setMapperClass(WordCountMapper.class); | |
// specify a reducer | |
conf.setReducerClass(WordCountReducer.class); | |
conf.setCombinerClass(WordCountReducer.class); | |
client.setConf(conf); | |
try { | |
JobClient.runJob(conf); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.util.StringTokenizer; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.LongWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.io.Writable; | |
import org.apache.hadoop.io.WritableComparable; | |
import org.apache.hadoop.mapred.MapReduceBase; | |
import org.apache.hadoop.mapred.Mapper; | |
import org.apache.hadoop.mapred.OutputCollector; | |
import org.apache.hadoop.mapred.Reporter; | |
public class WordCountMapper extends MapReduceBase | |
implements Mapper<LongWritable, Text, Text, IntWritable> { | |
private final IntWritable one = new IntWritable(1); | |
private Text word = new Text(); | |
public void map(WritableComparable key, Writable value, | |
OutputCollector output, Reporter reporter) throws IOException { | |
String line = value.toString(); | |
StringTokenizer itr = new StringTokenizer(line.toLowerCase()); | |
while(itr.hasMoreTokens()) { | |
word.set(itr.nextToken()); | |
output.collect(word, one); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.util.Iterator; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.io.WritableComparable; | |
import org.apache.hadoop.mapred.MapReduceBase; | |
import org.apache.hadoop.mapred.OutputCollector; | |
import org.apache.hadoop.mapred.Reducer; | |
import org.apache.hadoop.mapred.Reporter; | |
public class WordCountReducer extends MapReduceBase | |
implements Reducer<Text, IntWritable, Text, IntWritable> { | |
public void reduce(Text key, Iterator values, | |
OutputCollector output, Reporter reporter) throws IOException { | |
int sum = 0; | |
while (values.hasNext()) { | |
IntWritable value = (IntWritable) values.next(); | |
sum += value.get(); // process value | |
} | |
output.collect(key, new IntWritable(sum)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment