Last active
January 19, 2024 15:48
-
-
Save deanhouseholder/36d788fd795f94a40af216670b00552a to your computer and use it in GitHub Desktop.
Bash Search Function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Recursive File Contents Search function | |
# $1 = Search string (or -i to see list of ignored filetypes) | |
# $2 = (optional) File pattern (ex: *.js) (default is: *) | |
# $3 = (optional) Set to 1 for case-insensitive search (default is: 0 (case-sensitive)) | |
# $4 = (optional) Comma-separated list of directories to ignore (format is: ".git,vendor,node_modules,bin") | |
# Bug: Backslashes are not displaying in results. (ex: "\n" shows up as "n") | |
function search(){ | |
# Define Vars | |
local sep=$'\x01' # Obscure ascii character not likely to appear in files | |
local col_spacing=3 | |
local bold='\e[1m' | |
local end='\e[0m' | |
local green='\e[32m' | |
local purple='\e[35m' | |
local start_red_hex='1b5b313b33316d' # printf '\e[1;31m' | xxd -p | |
local stop_red_hex='1b5b306d' # printf '\e[0m' | xxd -p | |
local filter_swap_separators="s/^([^:]*):([^:]+):\s*(.*)$/\2$sep\1$sep\3/g" | |
local col_line_w=0 # Column containing the line number's max width | |
local col_path_w=0 # Column containing the file path's max width | |
local fixed_strings='--fixed-strings ' | |
local search name case_sensitive find_array col_line col_path col_data error message usage filetypes_to_ignore escaped_search search_hex replace_hex ignore_paths count ignore_filetypes clip_cols | |
# Ignore certain binary filetypes to speed up searching | |
filetypes_to_ignore=(3gp 7z aac apng avi avif bmp class dll doc docx exe flac gif gpg gz gzip ico iso jar jfif jpeg jpg jrb mdb mkv mov mp3 mp4 mpeg odt ogg pdf pgp pgp_ png pp ppt pptx pyc rar 'so.*' sqlite svg tar tar tif tiff vob wav webm webp wma wmv xls xlsx zip) | |
# Clip long results somewhere near the end of users' screen | |
clip_cols=$(($COLUMNS-5)) | |
# Check for missing input | |
if [[ -z "$1" ]]; then | |
error="${bold}\e[37m\e[41m" | |
message="${bold}\e[36m" | |
usage="\n${bold}Recursive File Search${end}\n\n" | |
usage+="${error}Error: %s${end}\n\n${message}Usage:\n${end}" | |
usage+="search SEARCH_PATTERN [FILE_PATTERN] [CASE_INSENSITIVE] [IGNORE_DIRS]\n\n" | |
usage+="${message}Parameters:${end}\n" | |
usage+="SEARCH_PATTERN The string to be matched\n" | |
usage+="FILE_PATTERN File matching pattern such as '*.php'\n" | |
usage+="CASE_INSENSITIVE 0 for case-sensitive search (default), 1 for case-insensitive\n" | |
usage+="IGNORE_DIRS Comma-separated list of directories to ignore. Use '*/dirname' to exclude in nested directories.\n\n" | |
usage+="${message}Examples:${end}\n" | |
usage+="search '.ajax' '*.js' 1\n" | |
usage+="search 'Fatal Error:' '*.log'\n" | |
usage+="search '<div class=\"cart\">' '*' 0 '.git,vendor,node_modules,bin'\n\n" | |
usage+="${message}Note:${end}\n" | |
usage+="You can also pass in '-i' as the first parameter to see a list of ignored filetypes.\n\n" | |
printf "${usage}" "No search string given" | |
return 1 | |
fi | |
# If user passes '-i' as the first parameter, show a list of the ignored filetypes | |
if [[ "$1" == '-i' ]]; then | |
printf "Filetypes that are ignored:\n" | |
out="$(printf ".%s, " "${filetypes_to_ignore[@]}")" | |
printf "%s\n\n" "$(echo $out | sed -E 's/(.*),/\1/')" | |
return 1 | |
fi | |
# Process user input | |
search="$1" | |
[[ -z "$2" ]] && name='*' || name="$2" | |
[[ "$name" == "*." ]] && name='*' | |
[[ "$3" == "1" ]] && case_sensitive='i' | |
# Build out switches to ignore directories | |
if [[ -n "$4" ]]; then | |
IFS=',' read -r -a paths_to_ignore <<< "$4" | |
# Expand out the array of paths to match this syntax: | |
# \( -path "tmp/*" -o -path ".git/*" \) -prune -o | |
ignore_paths='\(' | |
for path in "${paths_to_ignore[@]}"; do | |
ignore_paths+=" -path \"${path}/*\" -o" | |
done | |
ignore_paths="$(echo $ignore_paths | sed -E 's/(.*)\-o/\1/')" # Trim off the last '-o' | |
ignore_paths+='\) -prune -o' | |
fi | |
# Expand out the array of filetypes to ignore to match this syntax: | |
# \( -name '*.gif' -o -name '*.png' \) -prune -o | |
ignore_filetypes='\(' | |
for filetype in "${filetypes_to_ignore[@]}"; do | |
ignore_filetypes+=" -name '*.${filetype}' -o" | |
done | |
ignore_filetypes="$(echo $ignore_filetypes | sed -E 's/(.*)../\1/')" # Trim off the last '-o' | |
ignore_filetypes+='\) -prune -o' | |
# Escape any special characters in search input for safety with grep | |
escaped_search="$(printf -- '%s' "$search" | sed -e 's/\"/\\"/g' -e 's/`/\\`/g')" | |
# Handle special case if search is just a semicolon | |
if [[ "$search" == ';' ]]; then | |
fixed_strings='' | |
escaped_search='\;' | |
fi | |
# To avoid difficulty allowing all search characters without sed confusing them for regex characters, | |
# convert search input to hex characters where replace is simple. | |
search_hex="$(printf -- "%s" "$search" | xxd -p -c 1000000)" # Convert search into hex | |
replace_hex="$start_red_hex$search_hex$stop_red_hex" # Build replacement string in hex | |
# Perform search and capture the results into an array | |
# Match all paths except for ones that start with '..' (captures '.abc' and '..abc' as files/dirs) | |
# Searching this way gets rid of the leading './' | |
mapfile find_array < <( \ | |
eval "find * .[^.]* ..?* $ignore_paths $ignore_filetypes -type f -name '$name' -exec \ | |
grep -${case_sensitive}nH --color=never $fixed_strings -- \"$escaped_search\" {} + \ | |
2>/dev/null | grep -v -- '^Binary' | cut -c 1-$clip_cols | uniq | sed -r -e '$filter_swap_separators'" \ | |
) | |
# Loop through the first time to determine max column widths and total count | |
count=0 | |
while read -r line; do | |
while IFS="$sep" read -r col_line col_path col_data; do | |
[[ -n "$col_data" ]] && ((count ++)) | |
[[ $col_line_w -lt ${#col_line} ]] && col_line_w=${#col_line} | |
[[ $col_path_w -lt ${#col_path} ]] && col_path_w=${#col_path} | |
done < <(echo "${line[@]}") | |
done < <(echo "${find_array[@]}") | |
# Begin display results | |
if [[ $count -eq 0 ]]; then | |
printf "\nNo matches found\n\n" | |
else | |
# Add some padding | |
((col_line_w += col_spacing)) | |
if [[ $col_line_w -lt $((col_spacing + 4)) ]]; then | |
col_line_w=$((col_spacing + 4)) # Because the heading "Line" is 4 chars, make it at least that long | |
fi | |
((col_path_w += col_spacing)) | |
# Print heading | |
printf "\n${bold}%-${col_line_w}s%-${col_path_w}s%s${end}\n" "Line" "File Path" "Search Results" | |
printf "${bold}%-${col_line_w}s%-${col_path_w}s%s${end}\n" "----" "---------" "--------------" | |
# Loop through again to display output in columns | |
while read -r line; do | |
while IFS="$sep" read -r col_line col_path col_data; do | |
if [[ -n "$col_data" ]]; then | |
# Add color to search string in results (Do search/replace in hex mode and then swap back) | |
col_data="$(printf -- "%s" "$col_data" | xxd -p -c 1000000 | sed -- "s/$search_hex/$replace_hex/g" | xxd -p -r)" | |
printf -- "${green}%-${col_line_w}s$end${purple}%-${col_path_w}s$end%s\n" "$col_line" "$col_path" "${col_data//^\w/}" | |
fi | |
done < <(echo "${line[@]}") | |
done < <(echo "${find_array[@]}") | |
printf "\nMatches found: %s\n\n" "$count" | |
fi | |
} | |
function se(){ search "$1" '*.'"$2" 0 "$3"; } # Search shortcut which puts in the *. prefix to a filetype for you | |
function si(){ search "$1" '*.'"$2" 1 "$3"; } # Case-insensitive shortcut function | |
function sphp(){ search "$1" '*.php' "$2" "$3"; } # Search PHP files | |
function scss(){ search "$1" '*.css' "$2" "$3"; } # Search CSS files | |
function sjs(){ search "$1" '*.js' "$2" "$3"; } # Search JavaScript files | |
# Search for a count of matches within each file | |
function searchcount(){ | |
local matches | |
matches="$(command grep -RHn "$1" 2>/dev/null | grep -v '^Binary' | cut -d: -f1 | uniq -c)" | |
printf "Matches\tFilename\n-----\t--------------------------------\n%s\n" "$matches" | column -t | |
} | |
# Search for a count of case-insensitive matches within each file | |
function searchcounti(){ | |
local matches | |
matches="$(command grep -RHni "$1" 2>/dev/null | grep -v '^Binary' | cut -d: -f1 | uniq -c)" | |
printf "Matches\tFilename\n-----\t--------------------------------\n%s\n" "$matches" | column -t | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
These Bash functions search for contents within files starting from the current directory recursively. You need to source it initially then just invoke the functions. It is recommended that you add this to your bash startup files such as .bashrc or .bash_profile.