Last active
September 26, 2023 16:36
-
-
Save mttjohnson/9ddbe73a8819eda10aa4f68f51d0884a to your computer and use it in GitHub Desktop.
file recursive research
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# find the biggest .png files in a sub directory | |
# referenced from https://www.cyberciti.biz/faq/how-do-i-find-the-largest-filesdirectories-on-a-linuxunixbsd-filesystem/ | |
find . -type f -iname "*.png" -printf '%s %p\n'| sort -nr | head -25 | |
# List all symlinks recursively in a directory | |
find ./ -type l -printf "%p -> %l\n" | |
find . -type l -ls | |
# List all broken symlinks | |
find . -type l -exec sh -c "file -b {} | grep -q ^broken" \; -print | |
find -L . -type l | |
# Get the fully qualified path / absolute path of the current directory | |
pwd -P | |
# Get the fully qualified path / absolute path of a specific path | |
realpath ./ | |
perl -MCwd -le 'print Cwd::abs_path(shift)' "/sites" | |
# List any files that have a specific extension in the filename | |
find . -type f -name "*.htaccess" | |
# Newest File (with or without the timestamp) | |
find . -type f -printf '%CY-%Cm-%CdT%CH:%CM:%CS %p\n' | sort -rn | head -1 | |
find . -type f -printf '%CY-%Cm-%CdT%CH:%CM:%CS %p\n' | sort -rn | head -1 | cut -f2- -d" " | |
# Newest File (with or without the timestamp) Modified File Timestamp | |
find . -type f -printf '%TY-%Tm-%TdT%TH:%TM:%TS %p\n' | sort -rn | head -1 | |
# Oldest File (with or without the timestamp) | |
find . -type f -printf '%CY-%Cm-%CdT%CH:%CM:%CS %p\n' | sort -rn | tail -1 | |
find . -type f -printf '%CY-%Cm-%CdT%CH:%CM:%CS %p\n' | sort -rn | tail -1 | cut -f2- -d" " | |
# Largest File (with or without additional details) | |
find . -type f -printf "%s %p\n" | sort -rn | head -1 | cut -f2- -d" " | |
find . -type f -printf "%s %p\n" | sort -rn | head -1 | cut -f2- -d" " | xargs ls -lh | |
# Efficiently working with slow servers that contain lots of files in a directory (millions) | |
# Get the total recursive file count (not that accurate, but its at least close) | |
# The -R causes it to list files/directories recursively (may count directories 3 times) | |
# The -U disables sorting and implies -U | |
# The -A to show all hidden files as well but omits . and .. | |
# The -q replaces special control characters in filenames with a ? | |
# The -1 forces outputs one entry per line | |
ls -RUAq1 | wc -l | |
# Get the total recursive size of the path | |
du -hs | |
# Get the count of files and total recursive size of a specific path | |
find /www/uploads/ -type f | wc -l | |
du -hs /www/uploads/ | |
find /www/uploads/ -type f -print0 | du -bc --files0-from=- | tail -1 | cut -d$'\t' -f 1 | numfmt --to=iec | |
# Output a list of all the files relative paths recursively into a file | |
find . > temp_file_list | |
# List all files and sort by file size displaying the largest file last | |
# -S order by file size | |
# -r reverse order | |
# -h human readable (G/M/K of bytes) | |
ls -lSrh | |
# count of files changed in the last 24 hours | |
find /var/www/site1/ -type f -cmin -1440 | wc -l | |
# total size of files change in the last 24 hours | |
find /var/www/site1/ -type f -cmin -1440 -print0 | du -bc --files0-from=- | tail -1 | cut -d$'\t' -f 1 | numfmt --to=iec | |
# count of just files in directory (recursively) | |
find /var/www/site1/ -type f | wc -l | |
# count of just directories in directory (recursively) | |
find /var/www/site1/ -type d | wc -l | |
# count millions of files in a single directory | |
# The -U disables sorting and implies -U | |
# The -A to show all hidden files as well but omits . and .. | |
# The -q replaces special control characters in filenames with a ? | |
# The -1 forces outputs one entry per line | |
ls -UAq1 /sites/ | wc -l | |
# list details for recently changed files sorted by modified time | |
ls -lat $(find . -type f -cmin -1440) | |
# Get count of all files in list of paths if paths exist | |
echo " | |
/var/www/site1/ | |
/var/www/site2/ | |
" | while read line | |
do | |
[ ! -z "$line" ] && [ -d "$line" ] && find $line -type f | wc -l | |
done | paste -sd+ - | bc | |
# count of files changed in 24 hours | |
echo " | |
/var/www/site1/ | |
/var/www/site2/ | |
/var/www/site3/ | |
/var/www/site4/ | |
" | while read line | |
do | |
[ ! -z "$line" ] && [ -d "$line" ] && find $line -type f -cmin -1440 | wc -l | |
done | paste -sd+ - | bc | |
# list files changed in last 5 minutes | |
echo " | |
/var/www/site1/ | |
/var/www/site2/ | |
/var/www/site3/ | |
/var/www/site4/ | |
" | while read line | |
do | |
[ ! -z "$line" ] && [ -d "$line" ] && find $line -type f -cmin -5 | |
done | |
# note that find -cmin check the change timestamp not just modified timestamp | |
stat myfile.txt | |
# List all files owned by the current (or specific) user | |
find /tmp/ -user $USER 2> >(grep -v 'Permission denied' >&2) | xargs -r ls -lah | |
find /tmp/ -user $USER 2> >(grep -v 'Permission denied' >&2) | wc -l | |
# List all files owned by the current (or specific) user (older than 7 days old) | |
find /tmp/ -user $USER -mtime +7 2> >(grep -v 'Permission denied' >&2) | xargs -r ls -lah | |
find /tmp/ -user $USER -type f -mtime +7 2> >(grep -v 'Permission denied' >&2) | wc -l | |
# List all files owned by the current (or specific) user (newer than 7 days old) | |
find /tmp/ -user $USER -mtime -7 2> >(grep -v 'Permission denied' >&2) | xargs -r ls -lah | |
find /tmp/ -user $USER -mtime -7 2> >(grep -v 'Permission denied' >&2) | wc -l | |
# Delete files older than 7 days | |
find /tmp/asdf -mtime +7 -exec rm {} \; | |
cd /tmp/asdf && find /tmp/asdf -type f -mtime +7 -delete | |
# Create a bunch of test files with different creation dates | |
touch -d $(date +%Y%m%d -d +1day) test-file-$(date +%Y%m%d -d +1day) | |
touch -d $(date +%Y%m%d -d -1hour) test-file-$(date +%Y%m%d -d -1hour) | |
touch -d $(date +%Y%m%d -d -1day) test-file-$(date +%Y%m%d -d -1day) | |
touch -d $(date +%Y%m%d -d -2day) test-file-$(date +%Y%m%d -d -2day) | |
touch -d $(date +%Y%m%d -d -5day) test-file-$(date +%Y%m%d -d -5day) | |
touch -d $(date +%Y%m%d -d -6day) test-file-$(date +%Y%m%d -d -6day) | |
touch -d $(date +%Y%m%d -d -7day) test-file-$(date +%Y%m%d -d -7day) | |
touch -d $(date +%Y%m%d -d -8day) test-file-$(date +%Y%m%d -d -8day) | |
touch -d $(date +%Y%m%d -d -11day) test-file-$(date +%Y%m%d -d -11day) | |
touch -d $(date +%Y%m%d -d -45day) test-file-$(date +%Y%m%d -d -45day) | |
touch -d $(date +%Y%m%d -d -1month) test-file-$(date +%Y%m%d -d -1month) | |
touch -d $(date +%Y%m%d -d -1year) test-file-$(date +%Y%m%d -d -1year) | |
# Finding disk space from deleted files that are not reported by "du" | |
# We could see "df -h" reporting space used, but when trying to locate what exactly | |
# was using the space with "du -hd 1" we could not locate the file. Checking | |
# for open deleted files we found Apache holding on to a 20GB log file that had been | |
# deleted but never released. Restarting Apache released the deleted file so the | |
# space reported by "df -h" as available. | |
# Might need to run as root to see everything. | |
lsof | grep deleted | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment