Skip to content

Instantly share code, notes, and snippets.

@alanorth
Last active July 21, 2024 02:34
Show Gist options
  • Save alanorth/371696685ebf65ac5e04536870916e82 to your computer and use it in GitHub Desktop.
Save alanorth/371696685ebf65ac5e04536870916e82 to your computer and use it in GitHub Desktop.
Script for preparing media files for long-term archival
#!/usr/bin/env bash
#
# pre-process-media.sh v2021-12-26
#
# Prepare a directory of images and videos for long-term archival by normalizing
# their names, optimizing JPEGs with jpeg-archive, and stripping embedded MP4s
# from Android Motion Photos.
#
# SPDX-License-Identifier: GPL-3.0-only
# Changes:
#
# v2021-12-26: add 'ftypiso6' for Nokia Android 9 MVIMG files, anchor regexes to
# beginning of line to prevent renaming files prematurely.
# Exit on first error
set -o errexit
if [[ -z "$1" ]]; then
echo "No target directory specified."
exit 1
fi
echo "Preprocessing images and videos in $1"
# Change to the input file's directory
pushd "$1" >/dev/null
echo "Changing permissions to 640..."
find . -type f -exec chmod 640 {} \;
# Rename IMG_20210217_204834.jpg to 2021-02-17_204834.jpg
echo "Renaming images to ISO 8601 and removing IMG_..."
perl-rename 's/^IMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.jpg/$1-$2-$3_$4.jpg/' *.jpg
perl-rename 's/^IMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.JPG/$1-$2-$3_$4.jpg/' *.JPG
perl-rename 's/^IMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.jpeg/$1-$2-$3_$4.jpg/' *.jpeg
perl-rename 's/^IMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.JPEG/$1-$2-$3_$4.jpg/' *.JPEG
# Rename IMG_2180.HEIC to 2180.HEIC (iPhone)
perl-rename 's/^IMG_([0-9]{4}).HEIC/$1.heic/' *.HEIC
# Rename PXL_20210717_043834784.jpg to 2021-07-17_043834784.jpg (Pixel)
perl-rename 's/^PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.jpg/$1-$2-$3_$4.jpg/' *.jpg
# Remove embedded MP4s from Pixel motion images. Depending on the Android vers-
# ion the MP4 header could be ftypmp42, ftypmp4, ftypisom, etc. We have to check
# each one in succession, but note that grep will return a non-zero exit code
# if it doesn't find the pattern, so we need to temporarily disable errexit. To
# make matters worse, it seems some images are called MP and appear to contain
# multiple images in the Android Photos app, but don't contain an MP4!
#
# See: https://stackoverflow.com/questions/53104989/how-to-extract-the-photo-video-component-of-a-mvimg
# See: https://medium.com/android-news/working-with-motion-photos-da0aa49b50c
# See: https://linuxreviews.org/Google_Pixel_%22Motion_Photo%22
set +o errexit
for file in PXL_*.MP.jpg MVIMG_*.jpg; do
# Don't crash when there are no files matching the glob
[ -f "$file" ] || continue
# Check MP4 header, newer versions first
unset ofs
for header in 'ftypisom' 'ftypmp4' 'ftypmp42' 'ftypiso6'; do
ofs=$(grep -F --byte-offset --only-matching --text "$header" "$file")
if [[ $ofs ]]; then
ofs=${ofs%:*}
truncate -s $((ofs-4)) "$file"
# Go to next image
break
fi
done
done
# Re-set exit on first error
set -o errexit
# Rename PXL_20210717_043834784.MP.jpg to 2021-07-17_043834784.jpg (Pixel Motion Images)
perl-rename 's/^PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.MP\.jpg/$1-$2-$3_$4.jpg/' *.jpg
# Rename MVIMG_20190618_124507.jpg to 2019-06-18_124507.jpg (Android Motion Images)
perl-rename 's/^MVIMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.jpg/$1-$2-$3_$4.jpg/' *.jpg
# Rename PXL_20210910_193159741.NIGHT.jpg to 2021-09-10_193159741.jpg (Pixel Night mode)
perl-rename 's/^PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.NIGHT\.jpg/$1-$2-$3_$4.jpg/' *.jpg
# Rename PXL_20211118_162823829.PORTRAIT.jpg to 2021-11-18_162823829.jpg (Pixel Portrait mode)
perl-rename 's/^PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.PORTRAIT\.jpg/$1-$2-$3_$4.jpg/' *.jpg
for file in *.heic; do
[ -f "$file" ] || continue
# We are going to rename the HEIC files according to their embedded dates,
# but exiftool syntax is hard so I will just check if these files were al-
# ready renamed and skip them. These files come from iPhones.
if [[ ! $file =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{4}\.HEIC$ ]]; then
# Rename file based on DateTimeOriginal EXIF tag and the original file
# name (I removed the "IMG_" part with perl-rename first).
exiftool '-filename<${DateTimeOriginal}_${FileName}' -d %Y-%m-%d "$file"
fi
done
# Rename VID_20210205_112539.mp4 to 2021-02-05_112539.mp4
echo "Renaming videos to ISO 8601 and removing VID_..."
perl-rename 's/VID_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.mp4/$1-$2-$3_$4.mp4/' *.mp4
perl-rename 's/VID_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.MP4/$1-$2-$3_$4.mp4/' *.MP4
# Rename PXL_20210714_145336054.mp4 to 2021-07-14_145336054.mp4
perl-rename 's/PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.mp4/$1-$2-$3_$4\.mp4/' *.mp4
# Optimize JPEGs with jpeg-archive
find . -name '*.jpg' | chrt -b 0 parallel --no-notice "jpeg-recompress -q high {} {}"
# Change back to our starting directory
popd >/dev/null
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment