venuktan/src_FileSystemCat.java

## src_FileSystemCat.java
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;

import java.io.InputStream;
import java.net.URI;

/**
 * Created with IntelliJ IDEA.
 * User: haha
 * Date: 6/4/13
 * Time: 3:46 PM
 * To change this template use File | Settings | File Templates.
 */
public class FileSystemCat {
    public static void main(String[] args) throws Exception {
       fileSystemCat("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json");
    }
    static void fileSystemCat(String file)throws  Exception{
        String uri = file;
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(URI.create(uri), conf);
        InputStream in = null;
        try {
            in = fs.open(new Path(uri));
            IOUtils.copyBytes(in, System.out, 4096, false);
        } finally {
            IOUtils.closeStream(in);
        }
    }
}

## src_MaxTemparature.java
import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.codehaus.jackson.map.JsonMappingException;

import static org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.*;

public class MaxTemparature {

    public static void main(String[] args) throws Exception {
//        if (args.length != 2) {
//            System.err.println("Usage: MaxTemperature <input path> <output path>");
//            System.exit(-1);
//        }

        Job job = new Job();
        job.setJarByClass(MaxTemparature.class);
        job.setJobName("Max temperature");
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);

        FileInputFormat.addInputPath(job, new Path("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json"));
//        FileInputFormat.addInputPath(job, fs.open(new Path("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json")));
        FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.188/output/out.txt"));


        job.setMapperClass(MaxTemperatureMapper.class);
        job.setReducerClass(MaxTemperatureReducer.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

class MaxTemperatureMapper extends Mapper <LongWritable, Text, IntWritable, IntWritable> {

    private static final int MISSING = 9999;

    @Override
    public void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {

        String line = value.toString();
        String year = line.substring(15, 19);
        int airTemperature;
        if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs
            airTemperature = Integer.parseInt(line.substring(88, 92));
        } else {
            airTemperature = Integer.parseInt(line.substring(87, 92));
        }
        String quality = line.substring(92, 93);
        if (airTemperature != MISSING && quality.matches("[01459]")) {
            context.write(new IntWritable(Integer.parseInt(year)), new IntWritable(airTemperature));
        }
    }
}

class MaxTemperatureReducer
        extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {

    @Override
    public void reduce(IntWritable key, Iterable<IntWritable> values,
                       Context context)
            throws IOException, InterruptedException {

        int maxValue = Integer.MIN_VALUE;
        for (IntWritable value : values) {
            maxValue = Math.max(maxValue, value.get());
        }
        context.write( new IntWritable(), new IntWritable(maxValue));
    }
}


## src_TestPut.java
/**
 * Created with IntelliJ IDEA.
 * User: haha
 * Date: 5/31/13
 * Time: 3:13 PM
 * To change this template use File | Settings | File Templates.
 */
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.Progressable;

import java.io.*;
import java.net.URI;

public class TestPut {

    static void copyToHDFS(String localSrc, String destination) throws Exception {

        InputStream in = new BufferedInputStream((new FileInputStream(localSrc)));
        Configuration conf= new Configuration();
        FileSystem  fs = FileSystem.get(URI.create(destination), conf);
        OutputStream out =  fs.create(new Path(destination),new Progressable() {
            @Override
            public void progress() {
                System.out.print(".");
            }
        });
        IOUtils.copyBytes(in,out,4096,true);
    }
    public static void main(String[] args) throws Exception {
        String localSrc ="haha.txt";
        String dst ="hdfs://192.168.1.188/TestPut/haha.txt";
        copyToHDFS(localSrc, dst);
    }
}


## src_WordCount.java
/**
 * Created with IntelliJ IDEA.
 * User: haha
 * Date: 6/4/13
 * Time: 4:38 PM
 * To change this template use File | Settings | File Templates.
 */// WordCount.java
import java.io.IOException;
import java.util.Date;
import java.util.Formatter;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount {

    public static void main(String[] args) throws IOException,
            InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        GenericOptionsParser parser = new GenericOptionsParser(conf, args);
        args = parser.getRemainingArgs();

        Job job = new Job(conf, "wordcount");

        job.setJarByClass(WordCount.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        Formatter formatter = new Formatter();
        String outpath = "/output/"
                + formatter.format("%1$tm%1$td%1$tH%1$tM%1$tS", new Date());
        FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json"));
        FileOutputFormat.setOutputPath(job, new Path(outpath));
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);

        System.out.println(job.waitForCompletion(true));
    }
}
class WordCountReducer extends
        Reducer<Text, IntWritable, Text, IntWritable> {
    protected void reduce(Text key, Iterable<IntWritable> values,
                          Context context) throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable value : values) {
            sum += value.get();
        }
        context.write(key, new IntWritable(sum));
    }
}


class WordCountMapper extends
        Mapper<LongWritable, Text, Text, IntWritable> {
    private Text word = new Text();
    private final static IntWritable one = new IntWritable(1);

    protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {
        String line = value.toString();
        StringTokenizer tokenizer = new StringTokenizer(line);
        while (tokenizer.hasMoreTokens()) {
            word.set(tokenizer.nextToken());
            context.write(word, one);
        }
    }
}

## src_WordCountTest.java
/**
 * Created with IntelliJ IDEA.
 * User: haha
 * Date: 6/6/13
 * Time: 2:16 PM
 * To change this template use File | Settings | File Templates.
 */
public class WordCountTest {

}
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.IOUtils;

	import java.io.InputStream;
	import java.net.URI;

	/**
	* Created with IntelliJ IDEA.
	* User: haha
	* Date: 6/4/13
	* Time: 3:46 PM
	* To change this template use File \| Settings \| File Templates.
	*/
	public class FileSystemCat {
	public static void main(String[] args) throws Exception {
	fileSystemCat("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json");
	}
	static void fileSystemCat(String file)throws Exception{
	String uri = file;
	Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(URI.create(uri), conf);
	InputStream in = null;
	try {
	in = fs.open(new Path(uri));
	IOUtils.copyBytes(in, System.out, 4096, false);
	} finally {
	IOUtils.closeStream(in);
	}
	}
	}
	import java.io.IOException;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.FileSystem;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.IntWritable;
	import org.apache.hadoop.io.LongWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.Mapper;
	import org.apache.hadoop.mapreduce.Reducer;
	import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
	import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
	import org.codehaus.jackson.map.JsonMappingException;

	import static org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.*;

	public class MaxTemparature {

	public static void main(String[] args) throws Exception {
	// if (args.length != 2) {
	// System.err.println("Usage: MaxTemperature <input path> <output path>");
	// System.exit(-1);
	// }

	Job job = new Job();
	job.setJarByClass(MaxTemparature.class);
	job.setJobName("Max temperature");
	Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(conf);

	FileInputFormat.addInputPath(job, new Path("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json"));
	// FileInputFormat.addInputPath(job, fs.open(new Path("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json")));
	FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.188/output/out.txt"));


	job.setMapperClass(MaxTemperatureMapper.class);
	job.setReducerClass(MaxTemperatureReducer.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(IntWritable.class);

	System.exit(job.waitForCompletion(true) ? 0 : 1);
	}
	}

	class MaxTemperatureMapper extends Mapper <LongWritable, Text, IntWritable, IntWritable> {

	private static final int MISSING = 9999;

	@Override
	public void map(LongWritable key, Text value, Context context)
	throws IOException, InterruptedException {

	String line = value.toString();
	String year = line.substring(15, 19);
	int airTemperature;
	if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs
	airTemperature = Integer.parseInt(line.substring(88, 92));
	} else {
	airTemperature = Integer.parseInt(line.substring(87, 92));
	}
	String quality = line.substring(92, 93);
	if (airTemperature != MISSING && quality.matches("[01459]")) {
	context.write(new IntWritable(Integer.parseInt(year)), new IntWritable(airTemperature));
	}
	}
	}

	class MaxTemperatureReducer
	extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {

	@Override
	public void reduce(IntWritable key, Iterable<IntWritable> values,
	Context context)
	throws IOException, InterruptedException {

	int maxValue = Integer.MIN_VALUE;
	for (IntWritable value : values) {
	maxValue = Math.max(maxValue, value.get());
	}
	context.write( new IntWritable(), new IntWritable(maxValue));
	}
	}
	/**
	* Created with IntelliJ IDEA.
	* User: haha
	* Date: 5/31/13
	* Time: 3:13 PM
	* To change this template use File \| Settings \| File Templates.
	*/
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.fs.*;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.IOUtils;
	import org.apache.hadoop.io.IntWritable;
	import org.apache.hadoop.io.LongWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.util.Progressable;

	import java.io.*;
	import java.net.URI;

	public class TestPut {

	static void copyToHDFS(String localSrc, String destination) throws Exception {

	InputStream in = new BufferedInputStream((new FileInputStream(localSrc)));
	Configuration conf= new Configuration();
	FileSystem fs = FileSystem.get(URI.create(destination), conf);
	OutputStream out = fs.create(new Path(destination),new Progressable() {
	@Override
	public void progress() {
	System.out.print(".");
	}
	});
	IOUtils.copyBytes(in,out,4096,true);
	}
	public static void main(String[] args) throws Exception {
	String localSrc ="haha.txt";
	String dst ="hdfs://192.168.1.188/TestPut/haha.txt";
	copyToHDFS(localSrc, dst);
	}
	}
	/**
	* Created with IntelliJ IDEA.
	* User: haha
	* Date: 6/6/13
	* Time: 2:16 PM
	* To change this template use File \| Settings \| File Templates.
	*/
	public class WordCountTest {

	}