Skip to content

Instantly share code, notes, and snippets.

@ottomata
Created August 14, 2013 16:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ottomata/6232890 to your computer and use it in GitHub Desktop.
Save ottomata/6232890 to your computer and use it in GitHub Desktop.
package org.wikimedia.analytics.kraken.etl;
import com.linkedin.camus.coders.CamusWrapper;
import com.linkedin.camus.etl.IEtlKey;
import com.linkedin.camus.etl.RecordWriterProvider;
import com.linkedin.camus.etl.kafka.mapred.EtlMultiOutputFormat;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class TextRecordWriterProvider implements RecordWriterProvider {
public final static String EXT = ".txt";
@Override
public String getFilenameExtension() {
return EXT;
}
@Override
public RecordWriter<IEtlKey, CamusWrapper> getDataRecordWriter(
TaskAttemptContext context,
String fileName,
CamusWrapper data,
FileOutputCommitter committer) throws IOException, InterruptedException {
// Path path = committer.getWorkPath();
// path = new Path(path, EtlMultiOutputFormat.getUniqueFile(context, fileName, EXT));
// -- I think I need to somehow use this Path to tell the returned RecordWriter
// where to write, but I'm not sure how to do this. I've tried:
// outputFormat.setOutputPath(<Job>, path), but I don't know where to get Job from.
// All I've got is context, and I find a way to get the context's Job from that.
// This doesn't seem to write bytes properly:
FileOutputFormat outputFormat = new TextOutputFormat();
return outputFormat.getRecordWriter(context);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment