Skip to content

Instantly share code, notes, and snippets.

@bretthoerner
Created January 25, 2012 22:51
Show Gist options
  • Save bretthoerner/1679428 to your computer and use it in GitHub Desktop.
Save bretthoerner/1679428 to your computer and use it in GitHub Desktop.
import java.io.IOException;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.File;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.SnappyCodec;
public class MakeSequenceFile {
public static void main(String[] args) throws IOException {
BufferedReader in = new BufferedReader(new FileReader(args[0]));
String out = args[0] + ".snappy.seq";
if (new File(out).exists()) {
System.err.println(String.format("Error: file '%s' already exists.", out));
System.exit(1);
}
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(out), conf);
Path path = new Path(out);
Text key = new Text();
key.set("");
Text value = new Text();
SequenceFile.Writer writer = null;
SnappyCodec codec = new SnappyCodec();
try {
writer = SequenceFile.createWriter(fs, conf, path,
key.getClass(), value.getClass(),
SequenceFile.CompressionType.BLOCK,
codec);
String line;
while((line = in.readLine()) != null) {
value.set(line);
writer.append(key, value);
}
} finally {
IOUtils.closeStream(writer);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment