Last active
July 21, 2024 02:34
-
-
Save alanorth/371696685ebf65ac5e04536870916e82 to your computer and use it in GitHub Desktop.
Script for preparing media files for long-term archival
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# pre-process-media.sh v2021-12-26 | |
# | |
# Prepare a directory of images and videos for long-term archival by normalizing | |
# their names, optimizing JPEGs with jpeg-archive, and stripping embedded MP4s | |
# from Android Motion Photos. | |
# | |
# SPDX-License-Identifier: GPL-3.0-only | |
# Changes: | |
# | |
# v2021-12-26: add 'ftypiso6' for Nokia Android 9 MVIMG files, anchor regexes to | |
# beginning of line to prevent renaming files prematurely. | |
# Exit on first error | |
set -o errexit | |
if [[ -z "$1" ]]; then | |
echo "No target directory specified." | |
exit 1 | |
fi | |
echo "Preprocessing images and videos in $1" | |
# Change to the input file's directory | |
pushd "$1" >/dev/null | |
echo "Changing permissions to 640..." | |
find . -type f -exec chmod 640 {} \; | |
# Rename IMG_20210217_204834.jpg to 2021-02-17_204834.jpg | |
echo "Renaming images to ISO 8601 and removing IMG_..." | |
perl-rename 's/^IMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.jpg/$1-$2-$3_$4.jpg/' *.jpg | |
perl-rename 's/^IMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.JPG/$1-$2-$3_$4.jpg/' *.JPG | |
perl-rename 's/^IMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.jpeg/$1-$2-$3_$4.jpg/' *.jpeg | |
perl-rename 's/^IMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.JPEG/$1-$2-$3_$4.jpg/' *.JPEG | |
# Rename IMG_2180.HEIC to 2180.HEIC (iPhone) | |
perl-rename 's/^IMG_([0-9]{4}).HEIC/$1.heic/' *.HEIC | |
# Rename PXL_20210717_043834784.jpg to 2021-07-17_043834784.jpg (Pixel) | |
perl-rename 's/^PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.jpg/$1-$2-$3_$4.jpg/' *.jpg | |
# Remove embedded MP4s from Pixel motion images. Depending on the Android vers- | |
# ion the MP4 header could be ftypmp42, ftypmp4, ftypisom, etc. We have to check | |
# each one in succession, but note that grep will return a non-zero exit code | |
# if it doesn't find the pattern, so we need to temporarily disable errexit. To | |
# make matters worse, it seems some images are called MP and appear to contain | |
# multiple images in the Android Photos app, but don't contain an MP4! | |
# | |
# See: https://stackoverflow.com/questions/53104989/how-to-extract-the-photo-video-component-of-a-mvimg | |
# See: https://medium.com/android-news/working-with-motion-photos-da0aa49b50c | |
# See: https://linuxreviews.org/Google_Pixel_%22Motion_Photo%22 | |
set +o errexit | |
for file in PXL_*.MP.jpg MVIMG_*.jpg; do | |
# Don't crash when there are no files matching the glob | |
[ -f "$file" ] || continue | |
# Check MP4 header, newer versions first | |
unset ofs | |
for header in 'ftypisom' 'ftypmp4' 'ftypmp42' 'ftypiso6'; do | |
ofs=$(grep -F --byte-offset --only-matching --text "$header" "$file") | |
if [[ $ofs ]]; then | |
ofs=${ofs%:*} | |
truncate -s $((ofs-4)) "$file" | |
# Go to next image | |
break | |
fi | |
done | |
done | |
# Re-set exit on first error | |
set -o errexit | |
# Rename PXL_20210717_043834784.MP.jpg to 2021-07-17_043834784.jpg (Pixel Motion Images) | |
perl-rename 's/^PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.MP\.jpg/$1-$2-$3_$4.jpg/' *.jpg | |
# Rename MVIMG_20190618_124507.jpg to 2019-06-18_124507.jpg (Android Motion Images) | |
perl-rename 's/^MVIMG_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.jpg/$1-$2-$3_$4.jpg/' *.jpg | |
# Rename PXL_20210910_193159741.NIGHT.jpg to 2021-09-10_193159741.jpg (Pixel Night mode) | |
perl-rename 's/^PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.NIGHT\.jpg/$1-$2-$3_$4.jpg/' *.jpg | |
# Rename PXL_20211118_162823829.PORTRAIT.jpg to 2021-11-18_162823829.jpg (Pixel Portrait mode) | |
perl-rename 's/^PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.PORTRAIT\.jpg/$1-$2-$3_$4.jpg/' *.jpg | |
for file in *.heic; do | |
[ -f "$file" ] || continue | |
# We are going to rename the HEIC files according to their embedded dates, | |
# but exiftool syntax is hard so I will just check if these files were al- | |
# ready renamed and skip them. These files come from iPhones. | |
if [[ ! $file =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}_[0-9]{4}\.HEIC$ ]]; then | |
# Rename file based on DateTimeOriginal EXIF tag and the original file | |
# name (I removed the "IMG_" part with perl-rename first). | |
exiftool '-filename<${DateTimeOriginal}_${FileName}' -d %Y-%m-%d "$file" | |
fi | |
done | |
# Rename VID_20210205_112539.mp4 to 2021-02-05_112539.mp4 | |
echo "Renaming videos to ISO 8601 and removing VID_..." | |
perl-rename 's/VID_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.mp4/$1-$2-$3_$4.mp4/' *.mp4 | |
perl-rename 's/VID_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.MP4/$1-$2-$3_$4.mp4/' *.MP4 | |
# Rename PXL_20210714_145336054.mp4 to 2021-07-14_145336054.mp4 | |
perl-rename 's/PXL_([0-9]{4})([0-9]{2})([0-9]{2})_([0-9]+)\.mp4/$1-$2-$3_$4\.mp4/' *.mp4 | |
# Optimize JPEGs with jpeg-archive | |
find . -name '*.jpg' | chrt -b 0 parallel --no-notice "jpeg-recompress -q high {} {}" | |
# Change back to our starting directory | |
popd >/dev/null | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment