Skip to content

Instantly share code, notes, and snippets.

@pstch
Last active July 18, 2023 11:35
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pstch/6bb2c408aebbd3f3328052b07a0c3d53 to your computer and use it in GitHub Desktop.
Save pstch/6bb2c408aebbd3f3328052b07a0c3d53 to your computer and use it in GitHub Desktop.
Pipe a file to a process "in-place"
#!/usr/bin/env bash
# stream-replace.sh -- pipe a file to a process "in-place", using fallocate
#
# Copyright (C) 2020 Hugo Geoffroy "pistache" <pistache@lebib.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# NOTE: This script is non-portable, and requires :
# - Linux (because we use fallocate() with FALLOC_FL_PUNCH_HOLE)
# - bash 4.4 (to wait on a process substitution)
#
# The script can be rewritten to not require bash 4.4, using an
# explicit named pipe instead of the process substitution.
# Enable exit-on-error for script initialization
set -e
# Check for compatible bash version
if ! [[ "${BASH_VERSINFO[0]}" -le 4 && "${BASH_VERSINFO[1]}" -le 4 ]]; then
echo "$0: unsupported bash version (need 4.4+)"
fi
# Parse blocksize argument
if [[ "$1" == "-b" ]]; then
bs="$2"
shift 2
else
bs=1048576
fi
# Show usage message
if [[ "$#" -lt 2 ]]; then
echo "Usage: $0 [-b BLOCKSIZE] FILE COMMAND"
echo ""
echo "Use this script to overwrite FILE block-by-block "
echo "by passing its contents through COMMAND, while "
echo "unallocating processed blocks from FILE."
echo ""
echo "This can be used to process a file in place, "
echo "when this file will not fit in available memory, "
echo "and when there is enough free space to make a copy."
echo ""
echo "If not provided, BLOCKSIZE will be set to 1MiB."
echo ""
echo "WARNING: if COMMAND doesn't do anything with the data"
echo " sent to its standard input, FILE will be"
echo " truncated."
echo ""
echo "NOTE: if COMMAND fails during processing, only some part"
echo " of FILE will have been processed, although"
echo " the last block before the failure will be "
echo " preserved untouched, and script will report"
echo " the amount of processed bytes."
exit 1
fi
# Parse input file arguments
input="$1"
shift
# Test for input file existence
if [[ ! -e "$input" ]]; then
echo "$0: $input: No such file or directory"
exit 1
fi
# Create temporary output file
inputdir=$(dirname "$input")
inputname=$(basename "$input")
output=$(mktemp -p "$inputdir" "${inputname}.XXXXXXXXXX")
# Initialize loop variables
size=$(wc -c < "$input")
offset=0
i=0
# Disable exit-on-error so that we can do our own error handling
set +e
# Main loop
while (( offset < size )); do
# read $bs bytes at $offset into the named pipe
dd if="$input" bs="$bs" skip="$i" count=1 status=none || break
# remove $bs bytes at n-1 blocks from the input file
if (( i > 0 )); then
fallocate -p -o $((bs * (i-1))) -l $bs "$input"
fi
# increment counter and calculate new offset
offset=$((bs * ++i))
done > >( "$@" > "$output" ) ; wait $! ; retcode=$?
# Handle return code
if [[ $retcode -eq 0 ]]; then
# success: replace input file by output file
mv -f "$output" "$input"
else
# error: put back processed bytes in input file
dd if="$output" of="$input" bs="$bs" count="$i" conv=notrunc status=none
# delete output file
rm -f "$output"
# print error messages
echo "### Command $* failed with exit code $retcode ($offset bytes processed)" >&2
fi
exit $retcode
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment