Jem G. jgulum

## EmployeeReducer.java
package example;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

/*
 * To define a reduce function for your MapReduce job, subclass

## EmployeeMapper.java
package example;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

/*

## Driver.java
package example;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;

/*

## build.xml
<?xml version="1.0" encoding="UTF-8"?>

<project name="empdata_mrjob" default="run" basedir=".">
	<target name="init">

		<property name="src.dir" value="java/src" />
		<property name="build.dir" value="java/build" />
		<property name="dist.dir" value="java/dist" />
		<property name="hdfs.output.dir" value="/user/training/empcounts_java" />

## running the mapReduce job on Linux terminal.
#!/bin/sh

# Path of Hadoop streaming JAR library
STREAMJAR=/usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-*.jar

# Directory in which we'll store job output
OUTPUT=/user/training/empcounts

# Make sure we don't have output from a previous run.
# The -r option removes the directory recursively, and

## Reducer
#!/usr/bin/env python

import sys

previous_state = ''
count_for_state = 0

for line in sys.stdin:
   line = line.strip()

## Mapper
#!/usr/bin/env python

import sys

for line in sys.stdin:
   line = line.strip()
   (id, fname, lname, addr, city, state, zip, job, email, active, salary) = line.split("\t")

   if int(salary) >= 75000:
      print "%s,1" % state
	package example;

	import java.io.IOException;

	import org.apache.hadoop.io.IntWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapreduce.Reducer;

	/*
	* To define a reduce function for your MapReduce job, subclass
	package example;

	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.IntWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
	import org.apache.hadoop.mapreduce.Job;

	/*
	<?xml version="1.0" encoding="UTF-8"?>

	<project name="empdata_mrjob" default="run" basedir=".">
	<target name="init">

	<property name="src.dir" value="java/src" />
	<property name="build.dir" value="java/build" />
	<property name="dist.dir" value="java/dist" />
	<property name="hdfs.output.dir" value="/user/training/empcounts_java" />
	#!/bin/sh

	# Path of Hadoop streaming JAR library
	STREAMJAR=/usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming-*.jar

	# Directory in which we'll store job output
	OUTPUT=/user/training/empcounts

	# Make sure we don't have output from a previous run.
	# The -r option removes the directory recursively, and
	#!/usr/bin/env python

	import sys

	previous_state = ''
	count_for_state = 0

	for line in sys.stdin:
	line = line.strip()