|
#!/bin/bash |
|
# |
|
# Cleanup shopware media/image/ directory for misplaced files. Either obvious |
|
# violators (more then 3 levels deep), the image base directory or everything. |
|
# |
|
# Under certain circumstances[1] from 5.1 (SW-12620) until 5.2.8 the media |
|
# fallback would move ("migrate") directories and files to unreachable paths. |
|
# This was fixed in SW-16467 (https://github.com/shopware/shopware/commit/737ebc79) |
|
# and a config setting since 5.3 (liveMigration -> false) |
|
# |
|
# [1] the unfortunate case was a mail scanner/crawler requesting all the parent |
|
# directories of imagelinks contained in a just sent out newsletter, causing |
|
# the immediate disappearence of those images. Look out for these log entries: |
|
# core.ERROR: Legacy media url detected. |
|
# {"requestedUrl":"/media/image/19/85/90", |
|
# "redirectedTo":"https://example.com/media/image/e2/13/9b/90"} |
|
# |
|
# As thumbnail detection "in hindsight" is not accurate, use at your own risk. |
|
# Only the database can really tell source image and thumbnail apart. |
|
|
|
# echo usage mode |
|
if [ -z "$*" ]; then |
|
echo "Choose search policy: exceeding (default), basedir, all (risky)" |
|
echo "Usage: ~/bin/media-cleanup.sh {policy} (--real-run) | tee ~/media-cleanup.log" |
|
echo "" |
|
echo "Default is report mode only, use --real-run to execute changes." |
|
echo "" |
|
elif [ "$2" = '--real-run' ]; then |
|
echo "real-run. cleaning up.." |
|
fi |
|
|
|
# safety check |
|
if echo $PWD | grep -qE 'media\/image$'; then |
|
: |
|
else |
|
echo "Script not run from media/image/, please navigate to that directory. Exiting.." |
|
exit |
|
fi |
|
|
|
# thumbnail detection |
|
|
|
#resolutions="57x57 140x140 200x200 600x600 800x800 1280x1280 1920x1920" |
|
if [ -z "$resolutions" ]; then |
|
# read db credentials |
|
for var in username dbname password; do |
|
export $var="$(php --run '$loader = require("../../autoload.php"); $config = include("../../config.php"); print_r($config[db]['"$var"']);' 2>/dev/null)" |
|
done |
|
# query for all possible thumbnail dimensions, add non-explicit thumbnail sizes |
|
resolutions=$(mysql -u $username -p$password $dbname -B -N -e \ |
|
"select concat(thumbnail_size, '57x57 140x140') from s_media_album_settings group by thumbnail_size;" |\ |
|
grep -o -P "\d{2,4}x\d{2,4}" | sort -n | uniq | tr "\r\n" " ") |
|
fi |
|
|
|
# safety check |
|
if echo "$resolutions" | grep -q "140x140"; then |
|
: |
|
else |
|
echo '' |
|
echo 'Database fetch failed, define path to config_$env.php or set $resolutions at lines 46 or 42 manually' |
|
exit 1 |
|
fi |
|
|
|
# fill the variables and trim right pipe, will be used in a "grep -E" group |
|
for entry in $resolutions; do |
|
thumbjpg+="_$entry\.jpg|" |
|
thumbpng+="_$entry\.png|" |
|
thumbgif+="_$entry\.gif|" |
|
done |
|
thumbjpg=${thumbjpg%"|"} |
|
thumbpng=${thumbpng%"|"} |
|
thumbgif=${thumbgif%"|"} |
|
|
|
# generate md5path, honoring blacklist for adblockers ad -> g0 |
|
# engine/Shopware/Bundle/MediaBundle/Strategy/Md5Strategy.php#L36 |
|
function md5strategy() { |
|
md5sum | cut -c1-6 | sed 's/.\{2\}/&\//g' | sed 's/ad\//g0\//g' |
|
} |
|
# file command introduces I/O, less confident on png files. Other |
|
# media files than jpg/png not tested |
|
function isthumbnail() { |
|
if echo "$fullpath" | grep -q -P '_\d{2,4}x\d{2,4}(@2x)?.\w{3,4}$'; then |
|
if |
|
echo "$fullpath" | grep -q -P '@2x.\w{3}$'; then |
|
return 0 |
|
elif |
|
echo "$fullpath" | grep -q -E "$thumbjpg" \ |
|
&& file $fullpath | grep -q 'CREATOR: gd-jpeg' ; then |
|
return 0 |
|
elif |
|
echo "$fullpath" | grep -q -E "$thumbpng" \ |
|
&& file $fullpath | grep -q "non-interlaced"; then |
|
return 0 |
|
elif |
|
echo "$fullpath" | grep -q -E "$thumbgif" \ |
|
&& file $fullpath | grep -q "version 87a"; then |
|
return 0 |
|
else |
|
return 1 |
|
fi |
|
else |
|
return 1 |
|
fi |
|
} |
|
|
|
# do the actual work |
|
IFS="|" |
|
while read -r origpath fullpath filename |
|
do |
|
# no paths given |
|
if [ -z $origpath ]; then |
|
exit 0; |
|
fi |
|
# is it a thumbnail or a sourceimage, assign path |
|
if echo "$fullpath" | isthumbnail; then |
|
mediapath="media/image/thumbnail/$filename" |
|
mediatype="thumb" |
|
else |
|
mediapath="media/image/$filename" |
|
mediatype="image" |
|
fi |
|
|
|
md5path=$(echo -n "$mediapath" | md5strategy ) |
|
|
|
# does the found path equal the should-be path by name (and level) |
|
if echo "$origpath/" | grep -Fxq "./$md5path"; then |
|
continue |
|
#echo "$mediatype: $fullpath correct" |
|
else |
|
echo -n "$mediatype: $fullpath mismatch, should be at $md5path" |
|
if [ ! -f "$md5path$filename" ]; then |
|
# moving the file, verbose and non-overwriting/update-only |
|
if [ "$2" = '--real-run' ]; then |
|
mkdir -p "$md5path" |
|
echo -n " move: " |
|
mv -v -u "$fullpath" "$md5path" |
|
# the misplaced file can be safely deleted at its origin now |
|
rm -f "$fullpath" |
|
else echo |
|
fi |
|
else |
|
echo -n ", but exists at proper target already. real-run for delete. " |
|
if [ "$2" = '--real-run' ]; then |
|
rm -v "$fullpath" |
|
else echo |
|
fi |
|
fi |
|
fi |
|
|
|
# define scope of cleanup, for the parantheses in the options see stackoverflow.com/a/34503049 |
|
done < <( |
|
case "$1" in |
|
(exceeding|"") |
|
# find obvious violators exceeding the 3-folder deep md5 structure |
|
# this is the default |
|
find . -mindepth 5 -type f -printf "%h|%h/%f|%f\n" |
|
;; |
|
(basedir) |
|
# check only media/image/ and media/image/thumbnail/ |
|
find . -maxdepth 2 -type f -printf "%h|%h/%f|%f\n" |
|
;; |
|
(all) |
|
# find everything |
|
find . -type f -printf "%h|%h/%f|%f\n" |
|
;; |
|
esac) |
|
|
|
# this deletes hollow directorytrees. The splitted md5 hash directory structure |
|
# can make up to 64GB at 4096 bytes per dir: 4096*4096*4096 |
|
if [ "$2" = '--real-run' ]; then |
|
echo "deleting emptydirs" |
|
for n in 5 4 3 2 1; do |
|
find . -mindepth $n -type d -empty -delete |
|
done |
|
else |
|
emptydirs=$(find . -mindepth 1 -type d -empty | wc -l) |
|
echo "count of emptydirs: $emptydirs" |
|
fi |