Created
March 31, 2022 21:30
-
-
Save heuermh/7d14e4cf771dd06e7a3c8f8c0ebe6189 to your computer and use it in GitHub Desktop.
Benchmark xz and zstd performance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -x -e | |
SAMPLE="dataset_C" | |
# Compress using `xz` | |
time xz --compress --stdout $SAMPLE.fq > $SAMPLE.default.fq.xz | |
time xz --compress --stdout -0 $SAMPLE.fq > $SAMPLE.0.fq.xz | |
time xz --compress --stdout -9 $SAMPLE.fq > $SAMPLE.9.fq.xz | |
time xz --compress --stdout --extreme $SAMPLE.fq > $SAMPLE.extreme.fq.xz | |
# Compress using `zstd` | |
time zstd -1 -k $SAMPLE.fq -o $SAMPLE.1.fq.zst | |
time zstd -k $SAMPLE.fq -o $SAMPLE.default.fq.zst | |
time zstd -6 -k $SAMPLE.fq -o $SAMPLE.6.fq.zst | |
time zstd -19 -k $SAMPLE.fq -o $SAMPLE.19.fq.zst | |
# Compress using `dsh-bio`, XZ implementation provided by Apache `commons-compress` | |
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.xz | |
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.gz | |
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.bgz | |
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.bzip2 | |
time dsh-bio compress-fastq -i $SAMPLE.fq -o $SAMPLE.dsh-bio.fq.zst | |
# Compress using `seqkit`, https://github.com/ulikunitz/xz, https://github.com/klauspost/compress/tree/master/zstd | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.fq --out-file $SAMPLE.seqkit.fq.xz | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.fq --out-file $SAMPLE.seqkit.fq.zst | |
# Compare file sizes | |
xz --list *.xz | |
zstd -l *.zst | |
#du -h $SAMPLE* | |
du -h dataset* | |
# Decompress and validate files with `dsh-bio` compressed with `xz` | |
time dsh-bio compress-fastq -i $SAMPLE.0.fq.xz | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.default.fq.xz | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.9.fq.xz | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.extreme.fq.xz | wc -l | |
# Decompress and validate files with `dsh-bio` compressed with `zstd` | |
time dsh-bio compress-fastq -i $SAMPLE.1.fq.zst | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.default.fq.zst | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.6.fq.zst | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.19.fq.zst | wc -l | |
# Decompress and validate files with `dsh-bio` compressed with `dsh-bio` | |
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.xz | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.gz | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.bgz | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.bzip2 | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.dsh-bio.fq.zst | wc -l | |
# Decompress and validate files with `dsh-bio` compressed with `seqkit` | |
time dsh-bio compress-fastq -i $SAMPLE.seqkit.fq.xz | wc -l | |
time dsh-bio compress-fastq -i $SAMPLE.seqkit.fq.zst | wc -l | |
# Decompress and validate files with `seqkit` compressed with `xz` | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.0.fq.xz | wc -l | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.default.fq.xz | wc -l | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.9.fq.xz | wc -l | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.extreme.fq.xz | wc -l | |
# Decompress and validate files with `seqkit` compressed with `zstd` | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.1.fq.zst | wc -l | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.default.fq.zst | wc -l | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.6.fq.zst | wc -l | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.19.fq.zst | wc -l | |
# Decompress and validate files with `seqkit` compressed with `dsh-bio` | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.dsh-bio.fq.xz | wc -l | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.dsh-bio.fq.zst | wc -l | |
# Decompress and validate files with `seqkit` compressed with `seqkit` | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.seqkit.fq.xz | wc -l | |
time seqkit seq --validate-seq --line-width 0 $SAMPLE.seqkit.fq.zst | wc -l |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment