Utilities for finding large or unwanted files/dirs.
#! /bin/bash | |
# Find big dirs, minimally deep. | |
# Running in near-silent mode in case want to pipe output. | |
# | |
# Author: Micah Elliott <mde MicahElliott com> | |
#dt=$(date "+%Y%m%d") | |
#log="bigdirs.$dt.log" | |
log="$PWD/bigdirs.log" | |
dir=${1:-$PWD} | |
echo "Finding big dirs in “$dir”; results in “$log”" 1>&2 | |
# Will assume ‘.’ in search for cleaner output paths. | |
cd "$dir" | |
# See semblance of result for debugging. | |
#du * --max-depth 1 --one-file-system | sort -nr | |
### Find dirs bigger than ~40 MiB. | |
# Throwing away some nice size output, but this makes the processing | |
# simple. | |
du * --max-depth 1 --one-file-system \ | |
| sort -nr \ | |
| egrep -e '^[4-9][[:digit:]]{4}.*' -e '^[[:digit:]]{6,}.*' \ | |
| grep '/' \ | |
| cut -f2 \ | |
| tee $log |
#! /bin/bash | |
# Find unwanted big files (and misplaced archives). | |
# | |
# Author: Micah Elliott <mde MicahElliott com> | |
#dt=$(date "+%Y%m%d") | |
#log="bigfiles.$dt.log" | |
log="$PWD/bigfiles.log" | |
dir=${1:-$PWD} | |
echo "Finding big files in “$dir”; results in “$log”" 1>&2 | |
# Will use ‘.’ in find for cleaner paths. | |
cd "$dir" | |
# Find files bigger than 10 MiB (10,000,000 bytes) | |
find . \ | |
-type f -a \( \ | |
-size +10000000c -a \( \ | |
-name '*.so' -o \ | |
-name '*.a' -o \ | |
-name '*.x' -o \ | |
-name '*.exe' -o \ | |
-name '*.o' -o \ | |
-name '*.dat' -o \ | |
-name '*.in' \ | |
\) \ | |
-o \( \ | |
-iname '*.rpm' -o \ | |
-iname '*.tar' -o \ | |
-iname '*.gz' -o \ | |
-iname '*.tgz' -o \ | |
-iname '*.taz' -o \ | |
-iname '*.bz2' -o \ | |
-iname '*.Z' -o \ | |
-iname '*.iso' -o \ | |
-name 'core' -o \ | |
-iname '*.bin' -o \ | |
-name '*~' -o \ | |
-name '*.sw[po]' -o \ | |
-iname '*.zip' -o \ | |
-iname '*.gzip' \ | |
-iname '*.bak' \ | |
-iname '*.orig' \ | |
-name '*.pyc' \ | |
\) \ | |
\) \ | |
-printf "%P\n" \ | |
| tee $log |
#! /bin/zsh | |
# Find temporary files (and unwanted archives). | |
# | |
# Author: Micah Elliott <mde MicahElliott com> | |
dir=${1:-$PWD} | |
tmptypes=( | |
'*.so' | |
'*.a' | |
'*.x' | |
'*.exe' | |
'*.o' | |
'*.dat' | |
'*.in' | |
'*.rpm' | |
'*.tar' | |
'*.gz' | |
'*.tgz' | |
'*.taz' | |
'*.bz2' | |
'*.Z' | |
'*.iso' | |
'core' | |
'*.bin' | |
'*~' | |
'*.sw[po]' | |
'*.zip' | |
'*.gzip' | |
'*.bak' | |
'*.orig' | |
'*.pyc' | |
) | |
#patterns=() | |
for t in $tmptypes; do | |
patterns+=" -name $t -o " | |
done | |
patterns+=" -name DOES_NOT_EXIST " | |
echo "Finding temporary files." 1>&2 | |
cd "$dir" | |
# Surely there is a better way to expand $patterns, but this is the | |
# only way I’m finding that actually works. Other attempts | |
# accidentally expand the globs above, or expand $patterns as a quoted | |
# param. | |
find . -type f -a \( $(echo "$patterns") \) -printf "%P\n" | |
# Sort numerically . This needs to be piped into from above 'find'. | |
#ls -l $(cat $file) | sed -e 's/[ ]\+/ /g' | cut -f5,9 -d" " | sort -nr > $file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment