Skip to content

Instantly share code, notes, and snippets.

@delan
Last active August 25, 2022 09:15
Show Gist options
  • Save delan/f8e395a9f8c759868124936353880a05 to your computer and use it in GitHub Desktop.
Save delan/f8e395a9f8c759868124936353880a05 to your computer and use it in GitHub Desktop.
Tdarr size explorer script for use with QDirStat (or similar)
#!/bin/sh
# usage: $0 <path/to/shows>
#
# tdarr-dirstat uses hard links to allow QDirStat (or a similar tool) to take video codecs and
# resolution classes (SD, HD, FHD, UHD) into account when colouring tree maps.
#
# We create a .dirstat directory under the given path, containing the same directory structure as
# the original tree, then create hard links to the original videos of the form “foo.mkv.FHD+h264”.
# If .dirstat already exists, we update it for any new paths or changed inode numbers.
#
# You can then open path/to/shows/.dirstat in QDirStat, after configuring colours for patterns like
# “*.*+h264” or “*.FHD+*” under MIME Categories.
set -eu
ignored_extensions='jpg JPG nfo png srt sub txt jpeg gif ass'
ignored_codec_names='mjpeg png ansi'
compute_definition() {
width=$1; shift
height=$1; shift
par=$1; shift
par_h=${par%:*}
par_v=${par#*:}
width=$((width*par_h/par_v))
if [ $width -gt 1920 ]; then echo UHD
elif [ $width -gt 1280 ]; then echo FHD
elif [ $width -gt $(((1024+1280)/2)) ]; then echo HD
else echo SD
fi
}
# tests for compute_definition
[ $(compute_definition 720 480 8:9 ) = SD ] # NTSC 4:3
[ $(compute_definition 720 480 32:27 ) = SD ] # NTSC 16:9
[ $(compute_definition 720 576 16:15) = SD ] # PAL 4:3
[ $(compute_definition 720 576 64:45) = SD ] # PAL 16:9
[ $(compute_definition 960 720 4:3) = HD ] # 720p 16:9 anamorphic
[ $(compute_definition 1280 540 1:1) = HD ] # 720p 2.4:1
[ $(compute_definition 1280 720 1:1) = HD ] # 720p 16:9
[ $(compute_definition 1440 1080 4:3) = FHD ] # 1080i 16:9 anamorphic
[ $(compute_definition 1920 800 1:1) = FHD ] # 1080p 2.4:1
[ $(compute_definition 1920 1080 1:1) = FHD ] # 1080p 16:9
[ $(compute_definition 3840 1600 1:1) = UHD ] # 4K 2.4:1
[ $(compute_definition 3840 2160 1:1) = UHD ] # 4K 16:9
[ $(compute_definition 1916 1080 1:1) = FHD ] # K-ON!! ライブイベント ~Come with Me!!~ (2011)
[ $(compute_definition 1880 1080 1:1) = FHD ] # Doctor Who (2005) 1x01
# canonicalise root path
r=$(readlink -f -- "$1"); shift
t="$r/.dirstat"
# print ignored file extensions and codec names
printf '>>> ignored file extensions: %s\n' "$ignored_extensions"
printf '>>> ignored codec names: %s\n' "$ignored_codec_names"
# print file extensions found in $r
printf '>>> found file extensions (edit script to ignore/unignore):\n'
find "$r" \! -path '*/.dirstat/*' -type f -print0 | tr \\0 \\n | sed 's/.*[.]//' | sort -u | while read -r i; do
case " $ignored_extensions " in
(*" $i "*) printf ' %s (ignored)\n' "$i" ;;
(*) printf ' %s\n' "$i" ;;
esac
done
# pause to let user check ignored extensions/codecs
printf '>>> ok? '
read -r dummy
# create empty directory hierarchy
cd -- "$r"
find . \! -path '*/.dirstat' \! -path '*/.dirstat/*' -type d -print0 \
| tr \\0 \\n \
| sed 's|^[.]|'"$t"'|' \
| tr \\n \\0 \
| xargs -0r mkdir -p --
# replace each extension “ext” with “\! -name '*.ext'”
set -- $ignored_extensions
for i; do
shift
set -- "$@" \! -name '*.'"$i"
done
# for each old hard link
cd -- "$t"
find . -type f -print0 | tr \\0 \\n | while read -r i; do
# if original no longer exists, delete
if ! [ -f "$r/${i%.*}" ]; then
rm -- "$i"
fi
# if codec is now ignored, delete
case " $ignored_codec_names " in
(*" ${i##*+} "*) rm -- "$i" ;;
esac
done
# for each original video file
cd -- "$t"
find "$r" \! -path '*/.dirstat' \! -path '*/.dirstat/*' -type f "$@" -print0 | tr \\0 \\n | while read -r i; do
# make $i relative to $r
p=.${i#$r}
# if hard link exists
if [ -f "$p".* ]; then
# handle same or different inode
orig_inode=$(ls -i -- "$i" | awk '{print $1}')
link_inode=$(ls -i -- "$p".* | awk '{print $1}')
if [ "$link_inode" = "$orig_inode" ]; then
link_path=$(printf \%s "$p".*)
printf '=== %s = %s\n' "${link_path##*.}" "$p"
continue
else
rm -- "$p".*
fi
fi
# get metadata of all video streams
metadata=$(ffprobe "$i" -select_streams v -show_streams -hide_banner -loglevel error)
# get index of first non-ignored video stream
v=0
printf \%s\\n "$metadata" | grep '^codec_name=' | sed 's/.*=//' | while read -r i; do
case " $ignored_codec_names " in
(*" $i "*) v=$((v+1)) ;;
(*) break ;;
esac
done
codec=$(printf \%s\\n "$metadata" | grep '^codec_name=' | sed 's/.*=//' | sed $((v+1))\!d)
width=$(printf \%s\\n "$metadata" | grep '^width=' | sed 's/.*=//' | sed $((v+1))\!d)
height=$(printf \%s\\n "$metadata" | grep '^height=' | sed 's/.*=//' | sed $((v+1))\!d)
# https://ask.metafilter.com/325079/How-to-correct-for-incorrect-SAR-PAR-in-FFMPEG
# “Oh, and just to pile confusion on confusion, it appears that ffmpeg uses "SAR" to mean sample aspect ratio, not
# stored aspect ratio. In other words, what ffmpeg refers to as SAR is actually the PAR.”
# FIXME container and codec might have different aspect ratios?!
# e.g. 720x480 [SAR 8:9 DAR 4:3], SAR 480:527 DAR 720:527
# (It’s Always Sunny In Philadelphia 1x01)
par=$(printf \%s\\n "$metadata" | grep '^sample_aspect_ratio=' | sed 's/.*=//' | sed $((v+1))\!d)
# sample_aspect_ratio=N/A (Archer (2009) 6x09)
case "$par" in
(*[0-9]:[0-9]*) par=$par ;;
(*) par=1:1 ;;
esac
if [ -z "$codec" ] || [ -z "$width" ] || [ -z "$height" ] || [ -z "$par" ]; then
continue
fi
case " $ignored_codec_names " in
(*" $codec "*) continue ;;
esac
# print definition + codec + path
definition=$(compute_definition "$width" "$height" "$par")
printf '>>> %s+%s = %s\n' "$definition" "$codec" "$p"
# create hard link with added extension for $codec
# (qdirstat doesn’t follow symlinks)
ln "$i" "$p.$definition+$codec"
done
# print path for qdirstat
printf '>>> %s\n' "$(pwd)"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment