Skip to content

Instantly share code, notes, and snippets.

@rolfen
Last active February 29, 2024 11:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rolfen/c76f713ac75464b798a7c2e3398ccba5 to your computer and use it in GitHub Desktop.
Save rolfen/c76f713ac75464b798a7c2e3398ccba5 to your computer and use it in GitHub Desktop.
Extract previews and metadata from original photos and videos. Archive and backup.
#!/bin/bash
# Import and organize into subfolders by date taken (YYYY/MM/DD)
scriptname=`basename "$0"`
if [[ $# -ne 2 ]]; then
echo "Usage: $scriptname /source/dir /archive/dir"
else
(cd "$2"; exiftool -progress -ext MTS -ext ARW -ext ORF -ext xmp -r -m -d %Y/%m/%d "-Directory<DateTimeOriginal" "$1")
fi

Just create a new folder in the collections folder and copy selected previews in there to create a new collection.

Run a tool periodically to replace copies with hardlinks, for ex:

finddupe.exe -hardlink -ref ./previews ./collections/
# or just use the script remote-previews
# step 1
# copy originals from b2 remote to local directory
# Example:
rclone copy b2ro:Droppit/ArchivePhoto/2021/04 /home/rolf/photo/archive/2021/04 --progress
# step 2
# use previews.bash to extract previews from originals to another directory
# run it again to make sure that all previews were extracted
# also may want to run previews-rawdigest and previews-vid in the same way
# step 3
# replace originals with empty placeholders
find . -name "*.ARW" -type f -exec bash -c 'echo > "${0}"' {} \;
find . -name "*.ORF" -type f -exec bash -c 'echo > "${0}"' {} \;
# also do it for MTS or any other format that you handled
# goto step 1, repeat until all files are copied, that is rclone in step 1 returns after completing checks with 0 transfers.
#/bin/bash
# todo: check that temporary output file is removed on Ctrl-c interruption
# limitations: does not support spaces in dirnames
# requires md5sum
scriptname=`basename "$0"`
processed=0
failed=0
skipped=0
ext="md5.txt"
Help()
{
# Display Help
echo "Recursively calculate md5sums"
echo
echo "Syntax: $scriptname [-s|d|h]"
echo "options:"
echo " -s Source dir (without trailing /)."
echo " -d Destination dir (without trailing /)."
echo " -h Print this Help."
echo
}
# Get the options
while getopts "hs:d:" option; do
case $option in
h) # display Help
Help
exit;;
s) # Enter source
srcdir=$OPTARG;;
d) # Enter source
destdir=$OPTARG;;
\?) # Invalid option
echo "Error: Invalid option"
exit;;
esac
done
if [[ ! -v srcdir ]];
then
echo “Enter source”
read srcdir
fi
if [[ ! -v destdir ]];
then
echo “Enter destination”
read destdir
fi
for trgt in $(cd $srcdir && find . -type f \( -size +1 \) );
do
echo "$srcdir/$trgt >> $destdir/$trgt.$ext"
mkdir -p `dirname $destdir/$trgt`
if [ ! -f "$destdir/$trgt.$ext" ]; then
printf $(md5sum -b $srcdir/$trgt) > $destdir/$trgt.$ext
if [ $? -ne 0 ]; then
failed=$((failed+1))
else
processed=$((processed+1))
fi
else
skipped=$((skipped+1))
fi
done;
echo "Processed: $processed. Failed: $failed. Skipped: $skipped."
MISC NOTES:
# This is a nice way to extract exif as xml:
exiftool -a -X -n ./2023/06/15/DSC00259.ARW
# Delete in camera JPG (we assume that it's an in-camera jpeg if it's next to a RAW with the same base name)
find . -iname "*.ARW" -exec sh -c 'rm -f `dirname {}`/`basename {} .ARW`.JPG' \;
TODO:
- Test test how scripts handles Ctrl-C or being killed, specifically test for writing incompleted files
- Test consistency of checksums across platforms
- previews-rawdigest on linux dumps garbage into the shell
#/bin/bash
# requires exiftool
scriptname=`basename "$0"`
processed=0
failed=0
quiet=1
skipped=0
overwrite=1
Help()
{
# Display Help
echo "Recursively resample JPEGs"
echo
echo "Syntax: $scriptname [-s|d|q|h]"
echo "options:"
echo " -s Source dir (without trailing /)."
echo " -d Destination dir (without trailing /)."
echo " -q Quiet mode."
echo " -o Overwrite."
echo " -h Print this Help."
echo
}
# Get the options
while getopts "hqos:d:" option; do
case $option in
h) # display Help
Help
exit;;
s) # Enter source
srcdir=$OPTARG;;
d) # Enter source
destdir=$OPTARG;;
q) # Quiet mode
quiet=0;;
o) overwrite=0;;
\?) # Invalid option
echo "Error: Invalid option"
exit;;
esac
done
if [[ ! -v srcdir ]];
then
echo “Enter source”
read srcdir
fi
if [[ ! -v destdir ]];
then
echo “Enter destination”
read destdir
fi
while read -d $'\0' trgt
do
if [ $quiet -eq 1 ]; then
echo "$srcdir/$trgt >> $destdir/$trgt"
fi
mkdir -p $(dirname "$destdir/$trgt")
if [ ! -f "$destdir/$trgt" ] || [ $overwrite -eq 0 ]; then
djpeg -fast "$srcdir/$trgt" | pnmscalefixed -pixels 2000000 |cjpeg -quality 75 > "$destdir/$trgt"
if [ $? -ne 0 ]; then
failed=$((failed+1))
else
processed=$((processed+1))
fi
else
skipped=$((skipped+1))
fi
done < <(cd "$srcdir" && find . -type f -size +1 \( -iname \*.JPG -o -iname \*.JPEG \) -print0 )
echo "Processed: $processed. Failed: $failed. Skipped: $skipped."
#/bin/bash
scriptname=`basename "$0"`
recalculate=0 #default: true
quiet=1
processed=0
failed=0
skipped=0
exiftool_extra_parms=""
# todo:
# performance: list1=originals list2=previews which already have a rawdigest, find list3=list1-list2 that's what we want to process if -c is set
# but i guess first we need to write some tests
Help()
{
# Display Help
echo "Recursively extract previews from raw images"
echo
echo "Syntax: $scriptname [-s|d|c|h|q]"
echo
echo "options:"
echo " -s Source dir"
echo " -d Destination dir"
echo " -c Skip targets that already have a checksum"
echo " -q Quiet/batch mode"
echo " -h Print this Help"
echo
}
# Get the options
while getopts "s:d:chq" option; do
case $option in
h) # display Help
Help
exit;;
s) # Enter source
srcdir=$OPTARG;;
d) # Enter source
destdir=$OPTARG;;
c) # "continue mode", skip targets which already have a checksum
recalculate=1;;
q) exiftool_extra_parms=" -m -q -q "
quiet=0;;
\?) # Invalid option
echo "Error: Invalid option"
exit;;
esac
done
if [[ ! -v srcdir ]];
then
echo “Enter source”
read srcdir
fi
if [[ ! -v destdir ]];
then
echo “Enter destination”
read destdir
fi
while read -d $'\0' trgt
do
if [ $quiet -eq 1 ]; then
echo "$srcdir/$trgt >> $destdir/$trgt.JPG"
fi
skip=1
if [ $recalculate -eq 1 ] && [ -f "$destdir/$trgt.JPG" ]; then
# target present and recalculate is off: skip if we already have a checksum
if [ `exiftool $exiftool_extra_parms -rawimagedigest "$destdir/$trgt.JPG" | wc -c` -gt 0 ]; then
skip=0
fi
fi
if [ $skip -eq 1 ]; then
mkdir -p `dirname "$destdir/$trgt"`
rawdigest=$(exiftool $exiftool_extra_parms "$srcdir/$trgt" -all= -o - | md5sum | cut -d ' ' -f 1 ; exit ${PIPESTATUS[0]})
if [ $? -ne 0 ] && [ $quiet -eq 1 ]; then
failed=$((failed+1))
if [ $quiet -eq 1 ]; then
echo "Processed: $processed. Failed: $failed. Skipped: $skipped"
echo "Failure, press Ctrl-C to quit or any key to continue"
read garbage
fi
else
exiftool $exiftool_extra_parms -overwrite_original_in_place "$destdir/$trgt.JPG" -rawimagedigest="$rawdigest"
if [ $? -ne 0 ]; then
failed=$((failed+1))
if [ $quiet -eq 1 ]; then
echo "Processed: $processed. Failed: $failed. Skipped: $skipped"
echo "Failure, press Ctrl-C to quit or any key to continue"
read garbage
fi
else
processed=$((processed+1))
fi
fi
else
skipped=$((skipped+1))
fi
done < <(cd "$srcdir" && find . -type f -size +1 \( -iname \*.ORF -o -iname \*.ARW -o -iname \*.DNG \) -print0 )
echo "Processed: $processed. Failed: $failed. Skipped: $skipped"
#/bin/bash
scriptname=`basename "$0"`
processed=0
skipped=0
failed=0
batchmode=1
Help()
{
# Display Help
echo "Recursively extract previews from videos"
echo
echo "Syntax: $scriptname [-s|d|b|h]"
echo "options:"
echo " -s Source dir (without trailing /)."
echo " -d Destination dir (without trailing /)."
echo " -h Print this Help."
echo
}
# Get the options
while getopts "hbs:d:" option; do
case $option in
h) # display Help
Help
exit;;
b) # batch mode
batchmode=0;;
s) # Enter source
srcdir=$OPTARG;;
d) # Enter source
destdir=$OPTARG;;
\?) # Invalid option
echo "Error: Invalid option"
exit;;
esac
done
if [[ ! -v srcdir ]];
then
echo “Enter source”
read srcdir
fi
if [[ ! -v destdir ]];
then
echo “Enter destination”
read destdir
fi
while read -d $'\0' trgt
do
mkdir -p $(dirname "$destdir/$trgt")
if [ ! -f "$destdir/$trgt.webm" ]; then
if [ $batchmode -eq 1 ]; then
echo "$srcdir/$trgt"
fi
ffmpeg -n -hide_banner -loglevel error -stats -vsync vfr -i "$srcdir/$trgt" -c:v libvpx-vp9 -row-mt 1 -deadline good -crf 36 -c:a libopus -b:a 32k -vf "fps=6,scale=420:-1" "$destdir/$trgt.webm" < /dev/null
if [ $? -ne 0 ]; then
echo "Deleting partial output" >&2
failed=$((failed+1))
rm "$destdir/$trgt.webm";
if [ $batchmode -ne 0 ]; then
echo "Premature exit $scriptname" >&2
echo "Processed: $processed. Skipped: $skipped. Failed: $failed."
exit
fi
else
processed=$((processed+1))
fi
else
skipped=$((skipped+1))
fi
if [ ! -f "$destdir/$trgt.txt" ]; then
exiftool -m "$srcdir/$trgt" > "$destdir/$trgt.txt"
fi
done < <(cd "$srcdir" && find . -type f \( -size +1 -iname \*.MTS -o -size +1 -iname \*.AVI -o -size +1 -iname \*.MOV -o -size +1 -iname \*.MP4 \) -print0 )
echo "Processed: $processed. Skipped: $skipped. Failed: $failed."
#/bin/bash
# requires exiftool
# requires dcraw, imagemagick, djpeg for the -r option
scriptname=`basename "$0"`
# options (defaults)
quiet=1
resample=1
copyexif=1
#counters
processed=0
failed=0
skipped=0
Help()
{
# Display Help
echo "Recursively extract previews from raw images"
echo
echo "Syntax: $scriptname [-s|d|q|h|r|e]"
echo "options:"
echo " -s Source dir (without trailing /)."
echo " -d Destination dir (without trailing /)."
echo " -r Sample preview from RAW"
echo " -e Copy exif data."
echo " -q Quiet mode."
echo " -h Print this Help."
echo
}
# Get the options
while getopts "hqres:d:" option; do
case $option in
h) # display Help
Help
exit;;
s) # Enter source
srcdir=$OPTARG;;
d) # Enter source
destdir=$OPTARG;;
q) # Quiet mode
quiet=0;;
r) # Resample original
resample=0;;
e) # Copy Exif
copyexif=0;;
\?) # Invalid option
echo "Error: Invalid option"
exit;;
esac
done
if [[ ! -v srcdir ]];
then
echo “Enter source”
read srcdir
fi
if [[ ! -v destdir ]];
then
echo “Enter destination”
read destdir
fi
while read -d $'\0' trgt
do
if [ $quiet -eq 1 ]; then
echo "$srcdir/$trgt >> $destdir/$trgt.JPG"
fi
mkdir -p $(dirname "$destdir/$trgt")
if [ ! -f "$destdir/$trgt.JPG" ]; then
if [ $resample -eq 0 ]; then
dcraw -c -h "$srcdir/$trgt" | magick convert -resize 1000x1000 - - | cjpeg -dct fast -quality 80 > "$destdir/$trgt.JPG"
else
exiftool -m "$srcdir/$trgt" -b -previewimage > "$destdir/$trgt.JPG"
fi
if [ $copyexif -eq 0 ]; then
exiftool -m -tagsfromfile "$srcdir/$trgt" "-all:all>all:all" "$destdir/$trgt.JPG" -overwrite_original_in_place
fi
if [ $? -ne 0 ]; then
failed=$((failed+1))
else
processed=$((processed+1))
fi
else
skipped=$((skipped+1))
fi
done < <(cd "$srcdir" && find . -type f -size +1 \( -iname \*.ORF -o -iname \*.ARW -o -iname \*.DNG -o -iname \*.TIF \) -print0 )
echo "Processed: $processed. Failed: $failed. Skipped: $skipped."
scriptname=`basename "$0"`
# tmpdir=/home/rolf/photo/archive/2023
# remotedir=b2ro:Droppit/ArchivePhoto/2023
# dstdir=/home/rolf/photo/previews/2023
# maxtransfer=1G
truncate_counter=0
unset remotedir
Help()
{
# Display Help
echo "Manages preview creation from remote archive"
echo
echo "Syntax: $scriptname [-s|t|d|l|h]"
echo "Eg: $scriptname -s b2ro:Droppit/ArchivePhoto/2023 -t ../archive/2023 -d ../previews/2023 -l 4G"
echo "options:"
echo " -s Rclone pathspec of remote source"
echo " -t Temporary dir"
echo " -d Preview (target) dir"
echo " -l Transfer limit (eg: 4G)"
echo " -h Print this Help."
echo
}
Truncate()
{
echo > "$1"
}
# Get the options
while getopts "hs:d:t:l:" option; do
case $option in
h) # display Help
Help
exit;;
s) remotedir=$OPTARG;;
d) dstdir=$OPTARG;;
t) tmpdir=$OPTARG;;
l) maxtransfer=$OPTARG;;
\?) # Invalid option
echo "Error: Invalid option"
exit;;
esac
done
: ${tmpdir:?Specify temporary directory}
: ${dstdir:?Specify destination directory}
#this script relies on the other scripts skipping empty source files or files which already have previews.
if [ ! -z "$remotedir" ]; then
: ${maxtransfer:?Transfer limit is required}
rclone copy $remotedir $tmpdir --progress --ignore-existing --max-transfer $maxtransfer
fi
echo "Please wait for previews"
./previews.bash -q -s $tmpdir -d $dstdir -r -e
echo "Please wait for raw digests"
./previews-rawdigest.bash -q -c -s $tmpdir -d $dstdir
find $tmpdir -type f \( -iname "*.ORF" -o -iname "*.ARW" \) -size +1 > >( tee >(wc -l | xargs echo "In cam JPEGS truncated:") >(while read file; do Truncate `echo "$file" | sed 's/\(.*\)\..*/\1.JPG/'`; done) > /dev/null )
find $tmpdir -type f \( -iname "*.ORF" -o -iname "*.ARW" \) -size +1 > >( tee >(wc -l | xargs echo "Originals truncated:") >(while read file; do Truncate "$file" ; done) > /dev/null )
echo "Please wait for video previews"
./previews-vid.bash -b -s $tmpdir -d $dstdir
find $tmpdir -type f \( -iname "*.MTS" -o -iname "*.AVI" -o -iname "*.MOV" -o -iname "*.MP4" \) -size +1 > >(tee >(wc -l | xargs echo "Videos truncated:") >(while read file; do Truncate "$file" ; done) > /dev/null )
#!/usr/bin/env python3
import subprocess
import sys
i = 1
while (i <= int(sys.argv[1])) :
subprocess.run(["bash","-c"," ".join(sys.argv[2:])])
i = i + 1
# Local archive can be moved or copied to cloud storage. Example:
rclone moveto ../archive b2:Droppit/ArchivePhoto/ --min-size 2B --progress --include "*.{arw,xmp,orf}" --ignore-case --bwlimit 85
rclone moveto ../archive b2:Droppit/ArchivePhoto/ --min-size 2B --progress --include "*.{mts,mp4,mov,avi}" --ignore-case --bwlimit 85
# Note: As tempting as it is, --immutable is avoided because rclone can generated partially uploaded files on the remote when upload is interrupted (despite what the doc says) which would need to be resumed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment