Skip to content

Instantly share code, notes, and snippets.

@mttjohnson
Last active September 26, 2023 16:36
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mttjohnson/9ddbe73a8819eda10aa4f68f51d0884a to your computer and use it in GitHub Desktop.
Save mttjohnson/9ddbe73a8819eda10aa4f68f51d0884a to your computer and use it in GitHub Desktop.
file recursive research
# find the biggest .png files in a sub directory
# referenced from https://www.cyberciti.biz/faq/how-do-i-find-the-largest-filesdirectories-on-a-linuxunixbsd-filesystem/
find . -type f -iname "*.png" -printf '%s %p\n'| sort -nr | head -25
# List all symlinks recursively in a directory
find ./ -type l -printf "%p -> %l\n"
find . -type l -ls
# List all broken symlinks
find . -type l -exec sh -c "file -b {} | grep -q ^broken" \; -print
find -L . -type l
# Get the fully qualified path / absolute path of the current directory
pwd -P
# Get the fully qualified path / absolute path of a specific path
realpath ./
perl -MCwd -le 'print Cwd::abs_path(shift)' "/sites"
# List any files that have a specific extension in the filename
find . -type f -name "*.htaccess"
# Newest File (with or without the timestamp)
find . -type f -printf '%CY-%Cm-%CdT%CH:%CM:%CS %p\n' | sort -rn | head -1
find . -type f -printf '%CY-%Cm-%CdT%CH:%CM:%CS %p\n' | sort -rn | head -1 | cut -f2- -d" "
# Newest File (with or without the timestamp) Modified File Timestamp
find . -type f -printf '%TY-%Tm-%TdT%TH:%TM:%TS %p\n' | sort -rn | head -1
# Oldest File (with or without the timestamp)
find . -type f -printf '%CY-%Cm-%CdT%CH:%CM:%CS %p\n' | sort -rn | tail -1
find . -type f -printf '%CY-%Cm-%CdT%CH:%CM:%CS %p\n' | sort -rn | tail -1 | cut -f2- -d" "
# Largest File (with or without additional details)
find . -type f -printf "%s %p\n" | sort -rn | head -1 | cut -f2- -d" "
find . -type f -printf "%s %p\n" | sort -rn | head -1 | cut -f2- -d" " | xargs ls -lh
# Efficiently working with slow servers that contain lots of files in a directory (millions)
# Get the total recursive file count (not that accurate, but its at least close)
# The -R causes it to list files/directories recursively (may count directories 3 times)
# The -U disables sorting and implies -U
# The -A to show all hidden files as well but omits . and ..
# The -q replaces special control characters in filenames with a ?
# The -1 forces outputs one entry per line
ls -RUAq1 | wc -l
# Get the total recursive size of the path
du -hs
# Get the count of files and total recursive size of a specific path
find /www/uploads/ -type f | wc -l
du -hs /www/uploads/
find /www/uploads/ -type f -print0 | du -bc --files0-from=- | tail -1 | cut -d$'\t' -f 1 | numfmt --to=iec
# Output a list of all the files relative paths recursively into a file
find . > temp_file_list
# List all files and sort by file size displaying the largest file last
# -S order by file size
# -r reverse order
# -h human readable (G/M/K of bytes)
ls -lSrh
# count of files changed in the last 24 hours
find /var/www/site1/ -type f -cmin -1440 | wc -l
# total size of files change in the last 24 hours
find /var/www/site1/ -type f -cmin -1440 -print0 | du -bc --files0-from=- | tail -1 | cut -d$'\t' -f 1 | numfmt --to=iec
# count of just files in directory (recursively)
find /var/www/site1/ -type f | wc -l
# count of just directories in directory (recursively)
find /var/www/site1/ -type d | wc -l
# count millions of files in a single directory
# The -U disables sorting and implies -U
# The -A to show all hidden files as well but omits . and ..
# The -q replaces special control characters in filenames with a ?
# The -1 forces outputs one entry per line
ls -UAq1 /sites/ | wc -l
# list details for recently changed files sorted by modified time
ls -lat $(find . -type f -cmin -1440)
# Get count of all files in list of paths if paths exist
echo "
/var/www/site1/
/var/www/site2/
" | while read line
do
[ ! -z "$line" ] && [ -d "$line" ] && find $line -type f | wc -l
done | paste -sd+ - | bc
# count of files changed in 24 hours
echo "
/var/www/site1/
/var/www/site2/
/var/www/site3/
/var/www/site4/
" | while read line
do
[ ! -z "$line" ] && [ -d "$line" ] && find $line -type f -cmin -1440 | wc -l
done | paste -sd+ - | bc
# list files changed in last 5 minutes
echo "
/var/www/site1/
/var/www/site2/
/var/www/site3/
/var/www/site4/
" | while read line
do
[ ! -z "$line" ] && [ -d "$line" ] && find $line -type f -cmin -5
done
# note that find -cmin check the change timestamp not just modified timestamp
stat myfile.txt
# List all files owned by the current (or specific) user
find /tmp/ -user $USER 2> >(grep -v 'Permission denied' >&2) | xargs -r ls -lah
find /tmp/ -user $USER 2> >(grep -v 'Permission denied' >&2) | wc -l
# List all files owned by the current (or specific) user (older than 7 days old)
find /tmp/ -user $USER -mtime +7 2> >(grep -v 'Permission denied' >&2) | xargs -r ls -lah
find /tmp/ -user $USER -type f -mtime +7 2> >(grep -v 'Permission denied' >&2) | wc -l
# List all files owned by the current (or specific) user (newer than 7 days old)
find /tmp/ -user $USER -mtime -7 2> >(grep -v 'Permission denied' >&2) | xargs -r ls -lah
find /tmp/ -user $USER -mtime -7 2> >(grep -v 'Permission denied' >&2) | wc -l
# Delete files older than 7 days
find /tmp/asdf -mtime +7 -exec rm {} \;
cd /tmp/asdf && find /tmp/asdf -type f -mtime +7 -delete
# Create a bunch of test files with different creation dates
touch -d $(date +%Y%m%d -d +1day) test-file-$(date +%Y%m%d -d +1day)
touch -d $(date +%Y%m%d -d -1hour) test-file-$(date +%Y%m%d -d -1hour)
touch -d $(date +%Y%m%d -d -1day) test-file-$(date +%Y%m%d -d -1day)
touch -d $(date +%Y%m%d -d -2day) test-file-$(date +%Y%m%d -d -2day)
touch -d $(date +%Y%m%d -d -5day) test-file-$(date +%Y%m%d -d -5day)
touch -d $(date +%Y%m%d -d -6day) test-file-$(date +%Y%m%d -d -6day)
touch -d $(date +%Y%m%d -d -7day) test-file-$(date +%Y%m%d -d -7day)
touch -d $(date +%Y%m%d -d -8day) test-file-$(date +%Y%m%d -d -8day)
touch -d $(date +%Y%m%d -d -11day) test-file-$(date +%Y%m%d -d -11day)
touch -d $(date +%Y%m%d -d -45day) test-file-$(date +%Y%m%d -d -45day)
touch -d $(date +%Y%m%d -d -1month) test-file-$(date +%Y%m%d -d -1month)
touch -d $(date +%Y%m%d -d -1year) test-file-$(date +%Y%m%d -d -1year)
# Finding disk space from deleted files that are not reported by "du"
# We could see "df -h" reporting space used, but when trying to locate what exactly
# was using the space with "du -hd 1" we could not locate the file. Checking
# for open deleted files we found Apache holding on to a 20GB log file that had been
# deleted but never released. Restarting Apache released the deleted file so the
# space reported by "df -h" as available.
# Might need to run as root to see everything.
lsof | grep deleted
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment