Skip to content

Instantly share code, notes, and snippets.

@sdpatil
Created July 23, 2014 14:14
Workflow.xml for java map reduce job that uses distribued cache
<workflow-app xmlns="uri:oozie:workflow:0.2" name="apachelog-wf">
<start to="mr-node"/>
<action name="mr-node">
<map-reduce>
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<prepare>
<delete path="${nameNode}/user/${wf:user()}/output/${outputDir}"/>
</prepare>
<configuration>
<property>
<name>mapred.mapper.new-api</name>
<value>true</value>
</property>
<property>
<name>mapred.reducer.new-api</name>
<value>true</value>
</property>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
<property>
<name>mapreduce.map.class</name>
<value>com.spnotes.hadoop.logs.ApacheLogMapper</value>
</property>
<property>
<name>mapreduce.reduce.class</name>
<value>com.spnotes.hadoop.logs.ApacheLogReducer</value>
</property>
<property>
<name>mapred.output.key.class</name>
<value>org.apache.hadoop.io.Text</value>
</property>
<property>
<name>mapred.output.value.class</name>
<value>org.apache.hadoop.io.IntWritable</value>
</property>
<property>
<name>mapred.map.tasks</name>
<value>1</value>
</property>
<property>
<name>mapred.input.dir</name>
<value>/user/${wf:user()}/${logFile}</value>
</property>
<property>
<name>mapred.output.dir</name>
<value>/user/${wf:user()}/output/${outputDir}</value>
</property>
</configuration>
<file>GeoLite.mmdb#GeoLite2-City.mmdb</file>
</map-reduce>
<ok to="end"/>
<error to="fail"/>
</action>
<kill name="fail">
<message>Map/Reduce failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<end name="end"/>
</workflow-app>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment