Skip to content

Instantly share code, notes, and snippets.

@venuktan
Created June 7, 2013 18:29
Show Gist options
  • Save venuktan/5731336 to your computer and use it in GitHub Desktop.
Save venuktan/5731336 to your computer and use it in GitHub Desktop.
hadoop-venu-cloudwick
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.InputStream;
import java.net.URI;
/**
* Created with IntelliJ IDEA.
* User: haha
* Date: 6/4/13
* Time: 3:46 PM
* To change this template use File | Settings | File Templates.
*/
public class FileSystemCat {
public static void main(String[] args) throws Exception {
fileSystemCat("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json");
}
static void fileSystemCat(String file)throws Exception{
String uri = file;
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
InputStream in = null;
try {
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
}
}
}
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.codehaus.jackson.map.JsonMappingException;
import static org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.*;
public class MaxTemparature {
public static void main(String[] args) throws Exception {
// if (args.length != 2) {
// System.err.println("Usage: MaxTemperature <input path> <output path>");
// System.exit(-1);
// }
Job job = new Job();
job.setJarByClass(MaxTemparature.class);
job.setJobName("Max temperature");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
FileInputFormat.addInputPath(job, new Path("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json"));
// FileInputFormat.addInputPath(job, fs.open(new Path("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json")));
FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.188/output/out.txt"));
job.setMapperClass(MaxTemperatureMapper.class);
job.setReducerClass(MaxTemperatureReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
class MaxTemperatureMapper extends Mapper <LongWritable, Text, IntWritable, IntWritable> {
private static final int MISSING = 9999;
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String year = line.substring(15, 19);
int airTemperature;
if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs
airTemperature = Integer.parseInt(line.substring(88, 92));
} else {
airTemperature = Integer.parseInt(line.substring(87, 92));
}
String quality = line.substring(92, 93);
if (airTemperature != MISSING && quality.matches("[01459]")) {
context.write(new IntWritable(Integer.parseInt(year)), new IntWritable(airTemperature));
}
}
}
class MaxTemperatureReducer
extends Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
@Override
public void reduce(IntWritable key, Iterable<IntWritable> values,
Context context)
throws IOException, InterruptedException {
int maxValue = Integer.MIN_VALUE;
for (IntWritable value : values) {
maxValue = Math.max(maxValue, value.get());
}
context.write( new IntWritable(), new IntWritable(maxValue));
}
}
/**
* Created with IntelliJ IDEA.
* User: haha
* Date: 5/31/13
* Time: 3:13 PM
* To change this template use File | Settings | File Templates.
*/
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.Progressable;
import java.io.*;
import java.net.URI;
public class TestPut {
static void copyToHDFS(String localSrc, String destination) throws Exception {
InputStream in = new BufferedInputStream((new FileInputStream(localSrc)));
Configuration conf= new Configuration();
FileSystem fs = FileSystem.get(URI.create(destination), conf);
OutputStream out = fs.create(new Path(destination),new Progressable() {
@Override
public void progress() {
System.out.print(".");
}
});
IOUtils.copyBytes(in,out,4096,true);
}
public static void main(String[] args) throws Exception {
String localSrc ="haha.txt";
String dst ="hdfs://192.168.1.188/TestPut/haha.txt";
copyToHDFS(localSrc, dst);
}
}
/**
* Created with IntelliJ IDEA.
* User: haha
* Date: 6/4/13
* Time: 4:38 PM
* To change this template use File | Settings | File Templates.
*/// WordCount.java
import java.io.IOException;
import java.util.Date;
import java.util.Formatter;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
GenericOptionsParser parser = new GenericOptionsParser(conf, args);
args = parser.getRemainingArgs();
Job job = new Job(conf, "wordcount");
job.setJarByClass(WordCount.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
Formatter formatter = new Formatter();
String outpath = "/output/"
+ formatter.format("%1$tm%1$td%1$tH%1$tM%1$tS", new Date());
FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.1.188/data/yelp_academic_dataset_review_clean.json"));
FileOutputFormat.setOutputPath(job, new Path(outpath));
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
System.out.println(job.waitForCompletion(true));
}
}
class WordCountReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
protected void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable value : values) {
sum += value.get();
}
context.write(key, new IntWritable(sum));
}
}
class WordCountMapper extends
Mapper<LongWritable, Text, Text, IntWritable> {
private Text word = new Text();
private final static IntWritable one = new IntWritable(1);
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
context.write(word, one);
}
}
}
/**
* Created with IntelliJ IDEA.
* User: haha
* Date: 6/6/13
* Time: 2:16 PM
* To change this template use File | Settings | File Templates.
*/
public class WordCountTest {
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment