Skip to content

Instantly share code, notes, and snippets.

@heuermh
Created March 31, 2022 21:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save heuermh/7d14e4cf771dd06e7a3c8f8c0ebe6189 to your computer and use it in GitHub Desktop.
Save heuermh/7d14e4cf771dd06e7a3c8f8c0ebe6189 to your computer and use it in GitHub Desktop.
Benchmark xz and zstd performance
#!/bin/bash
set -x -e
SAMPLE="dataset_C"
# Compress using `xz`
time xz --compress --stdout $SAMPLE.fq > $SAMPLE.default.fq.xz
time xz --compress --stdout -0 $SAMPLE.fq > $SAMPLE.0.fq.xz
time xz --compress --stdout -9 $SAMPLE.fq > $SAMPLE.9.fq.xz
time xz --compress --stdout --extreme $SAMPLE.fq > $SAMPLE.extreme.fq.xz
# Compress using `zstd`
time zstd -1 -k $SAMPLE.fq -o $SAMPLE.1.fq.zst
time zstd -k $SAMPLE.fq -o $SAMPLE.default.fq.zst
time zstd -6 -k $SAMPLE.fq -o $SAMPLE.6.fq.zst
time zstd -19 -k $SAMPLE.fq -o $SAMPLE.19.fq.zst
# Compress using `dsh-bio`, XZ implementation provided by Apache `commons-compress`
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.xz
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.gz
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.bgz
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.bzip2
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.zst
# Compress using `seqkit`, https://github.com/ulikunitz/xz, https://github.com/klauspost/compress/tree/master/zstd
time seqkit seq --validate-seq --line-width 0 $SAMPLE.fq --out-file $SAMPLE.seqkit.fq.xz
time seqkit seq --validate-seq --line-width 0 $SAMPLE.fq --out-file $SAMPLE.seqkit.fq.zst
# Compare file sizes
xz --list *.xz
zstd -l *.zst
#du -h $SAMPLE*
du -h dataset*
# Decompress and validate files with `dsh-bio` compressed with `xz`
time dsh-bio compress-fastq -i $SAMPLE.0.fq.xz | wc -l
time dsh-bio compress-fastq -i $SAMPLE.default.fq.xz | wc -l
time dsh-bio compress-fastq -i $SAMPLE.9.fq.xz | wc -l
time dsh-bio compress-fastq -i $SAMPLE.extreme.fq.xz | wc -l
# Decompress and validate files with `dsh-bio` compressed with `zstd`
time dsh-bio compress-fastq -i $SAMPLE.1.fq.zst | wc -l
time dsh-bio compress-fastq -i $SAMPLE.default.fq.zst | wc -l
time dsh-bio compress-fastq -i $SAMPLE.6.fq.zst | wc -l
time dsh-bio compress-fastq -i $SAMPLE.19.fq.zst | wc -l
# Decompress and validate files with `dsh-bio` compressed with `dsh-bio`
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.xz | wc -l
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.gz | wc -l
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.bgz | wc -l
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.bzip2 | wc -l
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.zst | wc -l
# Decompress and validate files with `dsh-bio` compressed with `seqkit`
time dsh-bio compress-fastq -i $SAMPLE.seqkit.fq.xz | wc -l
time dsh-bio compress-fastq -i $SAMPLE.seqkit.fq.zst | wc -l
# Decompress and validate files with `seqkit` compressed with `xz`
time seqkit seq --validate-seq --line-width 0 $SAMPLE.0.fq.xz | wc -l
time seqkit seq --validate-seq --line-width 0 $SAMPLE.default.fq.xz | wc -l
time seqkit seq --validate-seq --line-width 0 $SAMPLE.9.fq.xz | wc -l
time seqkit seq --validate-seq --line-width 0 $SAMPLE.extreme.fq.xz | wc -l
# Decompress and validate files with `seqkit` compressed with `zstd`
time seqkit seq --validate-seq --line-width 0 $SAMPLE.1.fq.zst | wc -l
time seqkit seq --validate-seq --line-width 0 $SAMPLE.default.fq.zst | wc -l
time seqkit seq --validate-seq --line-width 0 $SAMPLE.6.fq.zst | wc -l
time seqkit seq --validate-seq --line-width 0 $SAMPLE.19.fq.zst | wc -l
# Decompress and validate files with `seqkit` compressed with `dsh-bio`
time seqkit seq --validate-seq --line-width 0 $SAMPLE.dsh-bio.fq.xz | wc -l
time seqkit seq --validate-seq --line-width 0 $SAMPLE.dsh-bio.fq.zst | wc -l
# Decompress and validate files with `seqkit` compressed with `seqkit`
time seqkit seq --validate-seq --line-width 0 $SAMPLE.seqkit.fq.xz | wc -l
time seqkit seq --validate-seq --line-width 0 $SAMPLE.seqkit.fq.zst | wc -l
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment