Skip to content

Instantly share code, notes, and snippets.

@Slach
Last active November 19, 2023 09:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Slach/dab0dd73c5a8a8cc35ca3fda10e79619 to your computer and use it in GitHub Desktop.
Save Slach/dab0dd73c5a8a8cc35ca3fda10e79619 to your computer and use it in GitHub Desktop.
clickhouse checksums.txt parser in bash
CHECKSUM_FILE=$1
if [[ "ENC" == $(dd if="${CHECKSUM_FILE}" bs=1 skip="0" count="3" 2>/dev/null) ]]; then
echo "ENCRYPTED FILES don't supported"
exit 0
fi
FORMAT_VERSION=$(head -n +1 "${CHECKSUM_FILE}" | sed 's/checksums format version: //g')
log() { printf '%s\n' "$*"; }
error() { log "ERROR: $*" >&2; }
fatal() { error "$@"; exit 1; }
# appends a command to a trap
#
# - 1st arg: code to add
# - remaining args: names of traps to modify
#
trap_add() {
trap_add_cmd=$1; shift || fatal "${FUNCNAME} usage error"
for trap_add_name in "$@"; do
trap -- "$(
# helper fn to get existing trap command from output
# of trap -p
extract_trap_cmd() { printf '%s\n' "$3"; }
# print existing trap command with newline
eval "extract_trap_cmd $(trap -p "${trap_add_name}")"
# print the new trap command
printf '%s\n' "${trap_add_cmd}"
)" "${trap_add_name}" \
|| fatal "unable to add to trap ${trap_add_name}"
done
}
function checksums_body_cmd {
if [[ "4" == "${FORMAT_VERSION}" ]]; then
tail -n +2 "${CHECKSUM_FILE}" | clickhouse-compressor -d
else
tail -n +2 "${CHECKSUM_FILE}"
fi
}
declare -g CURRENT_OFFSET=1
CURRENT_OFFSET_FIFO=$(mktemp -u) # Generate a unique temporary file name
touch $CURRENT_OFFSET_FIFO
trap_add 'rm -f $CURRENT_OFFSET_FIFO' EXIT
function read_uvarint {
readonly MaxVarintLen64=10
readonly const0x80=$(printf "%d" 0x80)
readonly const0x7f=$(printf "%d" 0x7f)
local x=0
local s=0
for ((i=0; i<MaxVarintLen64; i++)); do
read -r byte_value
((CURRENT_OFFSET += 1))
echo $CURRENT_OFFSET > $CURRENT_OFFSET_FIFO
if [ -z "$byte_value" ]; then
if [ $i -gt 0 ]; then
fatal "Error: unexpected end of file" >&2
fi
echo "$x"
return
fi
if [ $byte_value -lt $const0x80 ]; then
if [ $i -eq $((MaxVarintLen64-1)) ] && [ "$byte_value" -gt 1 ]; then
fatal "Error: overflow" >&2
fi
x=$((x | (byte_value << s)))
echo "$x"
return
fi
x=$((x | ((byte_value & $const0x7f) << s)))
s=$((s + 7))
done
echo "$x" >&2
fatal "Error: overflow" >&2
}
TEMP_CHECKSUM_BODY=$(mktemp)
trap_add 'rm -f "${TEMP_CHECKSUM_BODY}"' EXIT
checksums_body_cmd > "${TEMP_CHECKSUM_BODY}"
ITEMS_COUNT=$(hexdump -v -e '/1 "%u\n"' "${TEMP_CHECKSUM_BODY}" | read_uvarint)
read CURRENT_OFFSET < $CURRENT_OFFSET_FIFO
for ((i=1; i<=$ITEMS_COUNT; i++)); do
NAME_LENGTH=$(tail -c +$CURRENT_OFFSET "${TEMP_CHECKSUM_BODY}" | hexdump -v -e '/1 "%u\n"' | read_uvarint)
read CURRENT_OFFSET < $CURRENT_OFFSET_FIFO
NAME=$(dd if="${TEMP_CHECKSUM_BODY}" bs=1 skip="$((CURRENT_OFFSET-1))" count="${NAME_LENGTH}" 2>/dev/null)
((CURRENT_OFFSET += NAME_LENGTH))
FILE_SIZE=$(tail -c +$CURRENT_OFFSET "${TEMP_CHECKSUM_BODY}" | hexdump -v -e '/1 "%u\n"' | read_uvarint)
read CURRENT_OFFSET < $CURRENT_OFFSET_FIFO
FILE_HASH=$(dd if="${TEMP_CHECKSUM_BODY}" bs=1 skip="$((CURRENT_OFFSET-1))" count="16" 2>/dev/null | xxd -ps -c 32)
((CURRENT_OFFSET += 16))
IS_COMPRESSED=$(dd if="${TEMP_CHECKSUM_BODY}" bs=1 skip="$((CURRENT_OFFSET-1))" count="1" 2>/dev/null | xxd -p)
((CURRENT_OFFSET += 1))
if [ "00" != "$IS_COMPRESSED" ]; then
UNCOMPRESSED_SIZE=$(tail -c +$CURRENT_OFFSET "${TEMP_CHECKSUM_BODY}" | hexdump -v -e '/1 "%u\n"' | read_uvarint)
read CURRENT_OFFSET < $CURRENT_OFFSET_FIFO
UNCOMPRESSED_HASH=$(dd if="${TEMP_CHECKSUM_BODY}" bs=1 skip="$((CURRENT_OFFSET-1))" count="16" 2>/dev/null | xxd -ps -c 32)
((CURRENT_OFFSET += 16))
fi
echo "$NAME=$FILE_HASH"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment