Skip to content

Instantly share code, notes, and snippets.

@vk496
Created October 15, 2020 12:17
Show Gist options
  • Save vk496/fd2a7ecd305a88bea10ad6a8a567266b to your computer and use it in GitHub Desktop.
Save vk496/fd2a7ecd305a88bea10ad6a8a567266b to your computer and use it in GitHub Desktop.
Script to compress NetCDF files
#!/bin/env bash
# Author: Valentin Kivachuk Burda
DEPENDS=(nccopy awk wc grep find)
#https://stackoverflow.com/a/12436838/2757192
function max_bg_procs {
if [[ $# -eq 0 ]] ; then
echo "Usage: max_bg_procs NUM_PROCS. Will wait until the number of background (&)"
echo " bash processes (as determined by 'jobs -pr') falls below NUM_PROCS"
return
fi
local max_number=$((0 + ${1:-0}))
while true; do
local current_number=$(jobs -pr | wc -l)
if [[ $current_number -lt $max_number ]]; then
break
fi
sleep 0.1
done
}
function process_file {
local data=$(ncdump -sh "$1")
local num_vars=$(echo "$data" | grep ") ;" | wc -l)
local num_cvars=$(echo "$data" | grep ":_DeflateLevel" | cut -d' ' -f3 | wc -l)
if [[ $num_vars -ne $num_cvars ]]; then
# Compress
echo "[$2/$3] $1 COMPRESSING ($num_vars vars != $num_cvars compressed vars)...."
nccopy -d 7 "$1" "$1.npartial" && mv "$1.npartial" "$1"
else
echo "[$2/$3] $1 nop"
fi
}
for prog in ${DEPENDS[@]}; do
if ! hash $prog 2>/dev/null; then
echo "Missing $prog dependecy"
exit 1
fi
done
if [[ -f "$1" ]]; then
process_file "$1"
exit 0
elif [[ ! -d "$1" ]]; then
echo "$1 not a dir"
exit 1
fi
total_files=$(find "$1" -iname '*.nc' |wc -l)
curf=0
while IFS= read -r -d '' line; do
max_bg_procs $(nproc)
if [[ $(du -m "$line" | cut -f1) -lt 500 ]]; then
# Small enough to do it in multithread
process_file "$line" $curf $total_files &
else
process_file "$line" $curf $total_files
fi
((curf++))
done < <(find "$1" -iname '*.nc' -print0)
wait $(jobs -rp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment