Skip to content

Instantly share code, notes, and snippets.

@chetan
Last active August 11, 2023 12:38
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save chetan/6524829 to your computer and use it in GitHub Desktop.
Save chetan/6524829 to your computer and use it in GitHub Desktop.
List the available hadoop codecs
#!/usr/bin/env bash
# list_hadoop_codecs.sh
#
# USAGE:
# curl -sL https://gist.github.com/chetan/6524829/raw/list_hadoop_codecs.sh | bash
# make sure hadoop is avail
if [[ -z `which hadoop 2>/dev/null` ]]; then
echo "hadoop command not found!"
exit 1
fi
hadoop checknative 2>/dev/null
CP=$(hadoop classpath)
JLIB="-Djava.library.path=/usr/lib/hadoop/lib/native:/usr/lib/hbase/lib/native/Linux-amd64-64"
J=$(cat <<- EOF
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
public class ListCodecs {
public static void main(String[] args) throws Throwable {
List<Class<? extends CompressionCodec>> codecClasses = CompressionCodecFactory.getCodecClasses(new Configuration());
System.out.println("\navailable codecs:\n");
for (Class<? extends CompressionCodec> clazz : codecClasses) {
String str = "";
String codecName = clazz.getSimpleName();
if (codecName.endsWith("Codec")) {
codecName = codecName.substring(0, codecName.length() - "Codec".length());
str = codecName.toLowerCase();
}
String ext = clazz.newInstance().getDefaultExtension();
str = str + "\t\t" + ext + "\t" + (ext.length()<8 ? "\t":"") + clazz.getName();
System.out.println(str);
}
System.out.println("\n");
}
}
EOF
)
cd /tmp
echo $J > ListCodecs.java
javac -cp $CP ListCodecs.java
java $JLIB -cp .:$CP ListCodecs 2>/dev/null
rm -f ListCodecs.*
$ curl -sL https://gist.github.com/chetan/6524829/raw/list_hadoop_codecs.sh | bash
Native library checking:
hadoop: true
zlib: true
snappy: true
lz4: true
available codecs:
bzip2 .bz2 org.apache.hadoop.io.compress.BZip2Codec
default .deflate org.apache.hadoop.io.compress.DefaultCodec
deflate .deflate org.apache.hadoop.io.compress.DeflateCodec
gzip .gz org.apache.hadoop.io.compress.GzipCodec
lz4 .lz4 org.apache.hadoop.io.compress.Lz4Codec
snappy .snappy org.apache.hadoop.io.compress.SnappyCodec
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment