Skip to content

Instantly share code, notes, and snippets.

View rclayton-the-terrible's full-sized avatar

Richard Clayton rclayton-the-terrible

View GitHub Profile
<workflow-app name='example' xmlns="uri:oozie:workflow:0.1">
<start to='firstjob' />
<action name="firstjob">
<map-reduce>
<job-tracker>${jobtracker}</job-tracker>
<name-node>${namenode}</name-node>
<configuration>
<property><name>mapred.mapper.class</name><value>org.apache.hadoop.example.IdMapper</value></property>
<property><name>mapred.reducer.class</name><value>org.apache.hadoop.example.IdReducer</value></property>
<workflow-app name='example' xmlns="uri:oozie:workflow:0.1">
<start to='firstjob' />
<action name="firstjob">
<map-reduce>
<job-tracker>${jobtracker}</job-tracker>
<name-node>${namenode}</name-node>
<configuration>
<property><name>mapred.mapper.class</name><value>org.apache.hadoop.example.IdMapper</value></property>
<property><name>mapred.reducer.class</name><value>org.apache.hadoop.example.IdReducer</value></property>
Scheme sourceScheme = new TextLine( new Fields( "line" ) );
Tap source = new Hfs( sourceScheme, inputPath );
Scheme sinkScheme = new TextLine( new Fields( "word", "count" ) );
Tap sink = new Hfs( sinkScheme, outputPath, SinkMode.REPLACE );
Pipe assembly = new Pipe( "wordcount" );
String regex = "(?<!\\pL)(?=\\pL)[^ ]*(?<=\\pL)(?!\\pL)";
Function function = new RegexGenerator( new Fields( "word" ), regex );
public interface Mapper<InKey, InValue, OutKey, OutValue> {
void map(InKey key, InValue value, OutputCollector<OutKey, OutValue> output, Reporter reporter) throws IOException;
}
public interface EventFilter<TEvent> {
public enum Decision {
Handle,
Reject,
Retry
}
Decision filter(TEvent event);
@rclayton-the-terrible
rclayton-the-terrible / gist:1909211
Created February 25, 2012 15:55
EnvelopeFilter
public interface EnvelopeFilter {
public enum Decision {
Handle,
Reject,
Retry
}
public class MyWritable implements Writable {
// Some data
private int counter;
private long timestamp;
public void write(DataOutput out) throws IOException {
out.writeInt(counter);
out.writeLong(timestamp);
}
@rclayton-the-terrible
rclayton-the-terrible / gist:2474013
Created April 23, 2012 21:33
Hadoop WritableComparable
public class MyWritableComparable implements WritableComparable {
// Some data
private int counter;
private long timestamp;
public void write(DataOutput out) throws IOException {
out.writeInt(counter);
out.writeLong(timestamp);
}
@rclayton-the-terrible
rclayton-the-terrible / gist:2474028
Created April 23, 2012 21:38
Writable Interface
package org.apache.hadoop.io;
import java.io.DataOutput;
import java.io.DataInput;
import java.io.IOException;
public interface Writable {
void write(DataOutput out) throws IOException;
@rclayton-the-terrible
rclayton-the-terrible / gist:2474037
Created April 23, 2012 21:40
WritableComparable Interface
package org.apache.hadoop.io;
public interface WritableComparable<T> extends Writable, Comparable<T> {}