m-manu/WordCounter.java

## pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>manu.sandbox</groupId>
    <artifactId>hadoop-sandbox</artifactId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <hadoop.version>2.7.1.2.4.0.0-169</hadoop.version>
        <hbase.version>1.1.2.2.4.0.0-169</hbase.version>
    </properties>
    <repositories>
        <repository>
            <id>HDPReleases</id>
            <name>HDP Releases</name>
            <url>http://repo.hortonworks.com/content/repositories/releases/</url>
        </repository>
    </repositories>
    <dependencies>
        <dependency>
            <groupId>manu.sandbox</groupId>
            <artifactId>java-sandbox</artifactId>
            <version>1.0-SNAPSHOT</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
            <type>test-jar</type>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
            <scope>test</scope>
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <plugin>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.3</version>
                <configuration>
                    <source>1.7</source>
                    <target>1.7</target>
                    <compilerArgument>-Xlint:all</compilerArgument>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>

## WordCounter.java
package manu.sandbox.demos.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.StringTokenizer;

public class WordCounter {
    private static class WordMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        @Override
        public void map(LongWritable n, Text t, Context context) throws java.io.IOException, java.lang.InterruptedException {
            String line = t.toString();
            StringTokenizer tokenizer = new StringTokenizer(line, " ");
            while (tokenizer.hasMoreTokens()) {
                word.set(tokenizer.nextToken());
                context.write(word, one);
            }
        }
    }

    private static class WordReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

        @Override
        public void reduce(Text word, Iterable<IntWritable> counts, Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable count : counts) {
                sum += count.get();
            }
            context.write(word, new IntWritable(sum));
        }
    }

    public static void run(String input, String output, String jobName) {
        try {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf, jobName);
            Path inputPath = new Path(input);
            Path outputPath = new Path(output);
            FileInputFormat.setInputPaths(job, inputPath);
            FileOutputFormat.setOutputPath(job, outputPath);
            FileSystem fs = FileSystem.getLocal(conf);
            fs.delete(outputPath, true);
            job.setJarByClass(WordCounter.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            job.setMapperClass(WordMapper.class);
            job.setReducerClass(WordReducer.class);
            job.waitForCompletion(false);
        } catch (Exception e) {
            System.err.println("Exception thrown");
            e.printStackTrace();
        }
    }
}

## WordCounterTest.java
package manu.sandbox.demos.hadoop;

public class WordCounterTest {

    public static void main(String[] args) {
        if (args.length == 3) {
            WordCounter.run(args[0], args[1], args[2]);
        } else {
            System.err.println("Invalid number of arguments");
        }
    }
}
	<?xml version="1.0" encoding="UTF-8"?>
	<project xmlns="http://maven.apache.org/POM/4.0.0"
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>manu.sandbox</groupId>
	<artifactId>hadoop-sandbox</artifactId>
	<version>1.0-SNAPSHOT</version>
	<properties>
	<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	<hadoop.version>2.7.1.2.4.0.0-169</hadoop.version>
	<hbase.version>1.1.2.2.4.0.0-169</hbase.version>
	</properties>
	<repositories>
	<repository>
	<id>HDPReleases</id>
	<name>HDP Releases</name>
	<url>http://repo.hortonworks.com/content/repositories/releases/</url>
	</repository>
	</repositories>
	<dependencies>
	<dependency>
	<groupId>manu.sandbox</groupId>
	<artifactId>java-sandbox</artifactId>
	<version>1.0-SNAPSHOT</version>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-common</artifactId>
	<version>${hadoop.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-mapreduce-client-core</artifactId>
	<version>${hadoop.version}</version>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-common</artifactId>
	<version>${hadoop.version}</version>
	<type>test-jar</type>
	<scope>test</scope>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-hdfs</artifactId>
	<version>${hadoop.version}</version>
	<type>test-jar</type>
	<scope>test</scope>
	</dependency>
	<dependency>
	<groupId>org.apache.hadoop</groupId>
	<artifactId>hadoop-hdfs</artifactId>
	<version>${hadoop.version}</version>
	<scope>test</scope>
	</dependency>
	</dependencies>
	<build>
	<plugins>
	<plugin>
	<artifactId>maven-compiler-plugin</artifactId>
	<version>3.3</version>
	<configuration>
	<source>1.7</source>
	<target>1.7</target>
	<compilerArgument>-Xlint:all</compilerArgument>
	</configuration>
	</plugin>
	</plugins>
	</build>
	</project>
	package manu.sandbox.demos.hadoop;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.IntWritable;
	import org.apache.hadoop.io.LongWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.Mapper;
	import org.apache.hadoop.mapreduce.Reducer;
	import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

	import java.io.IOException;
	import java.util.StringTokenizer;

	public class WordCounter {
	private static class WordMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
	private final static IntWritable one = new IntWritable(1);
	private Text word = new Text();

	@Override
	public void map(LongWritable n, Text t, Context context) throws java.io.IOException, java.lang.InterruptedException {
	String line = t.toString();
	StringTokenizer tokenizer = new StringTokenizer(line, " ");
	while (tokenizer.hasMoreTokens()) {
	word.set(tokenizer.nextToken());
	context.write(word, one);
	}
	}
	}

	private static class WordReducer extends Reducer<Text, IntWritable, Text, IntWritable> {

	@Override
	public void reduce(Text word, Iterable<IntWritable> counts, Context context) throws IOException, InterruptedException {
	int sum = 0;
	for (IntWritable count : counts) {
	sum += count.get();
	}
	context.write(word, new IntWritable(sum));
	}
	}

	public static void run(String input, String output, String jobName) {
	try {
	Configuration conf = new Configuration();
	Job job = Job.getInstance(conf, jobName);
	Path inputPath = new Path(input);
	Path outputPath = new Path(output);
	FileInputFormat.setInputPaths(job, inputPath);
	FileOutputFormat.setOutputPath(job, outputPath);
	FileSystem fs = FileSystem.getLocal(conf);
	fs.delete(outputPath, true);
	job.setJarByClass(WordCounter.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);
	job.setMapperClass(WordMapper.class);
	job.setReducerClass(WordReducer.class);
	job.waitForCompletion(false);
	} catch (Exception e) {
	System.err.println("Exception thrown");
	e.printStackTrace();
	}
	}
	}
	package manu.sandbox.demos.hadoop;

	public class WordCounterTest {

	public static void main(String[] args) {
	if (args.length == 3) {
	WordCounter.run(args[0], args[1], args[2]);
	} else {
	System.err.println("Invalid number of arguments");
	}
	}
	}