secret
Last active

  • Download Gist
LzoReadTest.java
Java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
package com.twitter.twadoop.jobs;
 
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
 
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.mapreduce.Job;
 
import com.hadoop.compression.lzo.LzopDecompressor;
import com.hadoop.compression.lzo.LzopInputStream;
 
public class LzoReadTest {
 
public static void main(String[] args) throws IOException {
 
 
System.out.println(".. but can I read the file myself? ");
int bs = 256*1024;
 
Path file = new Path(args[0]);
 
Configuration conf = new Configuration();
FileSystem fs = file.getFileSystem(conf);
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(conf);
final CompressionCodec codec = compressionCodecs.getCodec(file);
if (codec == null) {
throw new IOException("Codec for file " + file + " not found, cannot run");
}
 
// Read from local
InputStream is2 = codec.createInputStream(new FileInputStream("/tmp/statuses_recompressed.lzo"));
System.out.println("My codec class is " + codec.getClass() + " and my inputstream class is " + is2.getClass());
 
byte[] foo = new byte[bs];
while (is2.read(foo) != -1) {
System.out.print(".");
}
System.out.println("\n-- Local Ok. Let's try HDFS. --");
 
// Read from HDFS
FSDataInputStream fileIn = fs.open(file);
InputStream is = codec.createInputStream(fileIn);
 
System.out.println("My codec class is " + codec.getClass() + " and my inputstream class is " + is.getClass());
while (is.read(foo) != -1 ) {
System.out.print(".");
}
System.out.print("\n");
System.out.println("-- All GOOD! --");
}
}
log
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
[dmitriy@sjc1j039 ~]$ hadoop jar twadoop/twadoop-1.0.jar com.twitter.twadoop.jobs.LzoReadTest /user/dmitriy/statuses_recompressed.lzo
.. but can I read the file myself?
10/04/01 06:39:23 INFO lzo.GPLNativeCodeLoader: Loaded native gpl library. OH BTW HI DMITRIY
10/04/01 06:39:23 INFO lzo.LzoCodec: Successfully loaded & initialized native-lzo library [hadoop-lzo rev 3afba3037be4152fcc088ddb13f2ec12e659ed9c]
My codec class is class com.hadoop.compression.lzo.LzopCodec and my inputstream class is class com.hadoop.compression.lzo.LzopInputStream
................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
-- Local Ok. Let's try HDFS. --
My codec class is class com.hadoop.compression.lzo.LzopCodec and my inputstream class is class com.hadoop.compression.lzo.LzopInputStream
xception in thread "main" java.lang.InternalError: lzo1x_decompress_safe returned:
at com.hadoop.compression.lzo.LzoDecompressor.decompressBytesDirect(Native Method)
at com.hadoop.compression.lzo.LzoDecompressor.decompress(LzoDecompressor.java:303)
at com.hadoop.compression.lzo.LzopDecompressor.decompress(LzopDecompressor.java:122)
at com.hadoop.compression.lzo.LzopInputStream.decompress(LzopInputStream.java:223)
at org.apache.hadoop.io.compress.DecompressorStream.read(DecompressorStream.java:74)
at java.io.InputStream.read(InputStream.java:85)
at com.twitter.twadoop.jobs.LzoReadTest.main(LzoReadTest.java:51)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
 
 
[dmitriy@sjc1j039 ~]$ ls -la /tmp/statuses_recompressed.lzo
-rw-r--r-- 1 dmitriy employee 418167708 Apr 1 00:54 /tmp/statuses_recompressed.lzo
[dmitriy@sjc1j039 ~]$ hadoop fs -ls statuses_recompressed.lzo
Found 1 items
-rw-r--r-- 3 dmitriy supergroup 418167708 2010-03-31 19:19 /user/dmitriy/statuses_recompressed.lzo

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.