Skip to content

Instantly share code, notes, and snippets.

@rajarshi
Created May 4, 2009 16:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rajarshi/106527 to your computer and use it in GitHub Desktop.
Save rajarshi/106527 to your computer and use it in GitHub Desktop.
package net.rguha.dc;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
/**
* Handle multi-line SDF records.
* <p/>
* Based on code found <a href="http://www.nabble.com/Re%3A-map-reduce-function-on-xml-string-p15835195.html">here</a>
* which handles records in an XML file.
*/
public class SDFInputFormat extends TextInputFormat {
@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) {
return new SDFRecordReader();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment