Skip to content

Instantly share code, notes, and snippets.

@noiano
Created February 2, 2011 19:54
Show Gist options
  • Save noiano/808297 to your computer and use it in GitHub Desktop.
Save noiano/808297 to your computer and use it in GitHub Desktop.
public class JobDriver {
/**
* @param args
* @throws IOException
* @throws ClassNotFoundException
* @throws InterruptedException
*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
conf.set("xmlinput.start", "<document");
conf.set("xmlinput.end", "document>");
Job job = new Job(conf, "invindex");
//job.setJarByClass(JobDriver.class);
job.setNumReduceTasks(2);
job.setMapperClass(com.github.noiano.hXMLInvIndex.InvIndMapperDOM.class);
job.setReducerClass(com.github.noiano.hXMLInvIndex.InvIndexReducer.class);
job.setPartitionerClass(TermPartitioner.class);
//job.setGroupingComparatorClass(GroupingComparator.class);
job.setMapOutputKeyClass(PairOfStringInt.class);
job.setMapOutputValueClass(PairOfWritables.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Posting.class);
job.setInputFormatClass(XMLInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path("/home/noiano/PycharmProjects/Text2Xml/small.xml"));
FileOutputFormat.setOutputPath(job, new Path("output"));
Path outputDir = new Path("output");
FileSystem.get(conf).delete(outputDir, true);
job.waitForCompletion(true);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment