Last active
July 9, 2024 09:13
-
-
Save redpop/143578c31459f2e7774ec6c0c31f33c6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Function to display usage information | |
usage() { | |
echo "Usage: $0 source_directory [-o output_file] [-e exclude_dirs] [-x exclude_exts]" | |
echo " source_directory Source directory to be processed" | |
echo " -o output_file Optional output file name (default: output.txt)" | |
echo " -e exclude_dirs Optional comma-separated list of directories to exclude" | |
echo " -x exclude_exts Optional comma-separated list of file extensions to exclude" | |
exit 1 | |
} | |
# Check if source directory is provided | |
if [ $# -eq 0 ]; then | |
echo "Error: Source directory is required" | |
usage | |
fi | |
# Set source directory from first argument | |
sourcedir=$1 | |
shift | |
# Default values | |
outputfile="output.txt" | |
excludedirs=() | |
excludeexts=() | |
# Parsing command-line arguments | |
while getopts "o:e:x:" opt; do | |
case $opt in | |
o) | |
outputfile=$OPTARG | |
;; | |
e) | |
IFS=',' read -r -a excludedirs <<< "$OPTARG" | |
;; | |
x) | |
IFS=',' read -r -a excludeexts <<< "$OPTARG" | |
;; | |
?) | |
echo "Option -$OPTARG requires an argument." >&2 | |
usage | |
;; | |
esac | |
done | |
# Check if source directory exists | |
if [ ! -d "$sourcedir" ]; then | |
echo "Error: Source directory '$sourcedir' does not exist" | |
exit 1 | |
fi | |
# Check if output file already exists | |
if [ -f "$outputfile" ]; then | |
read -p "Output file '$outputfile' already exists. Overwrite? [Y/n]: " answer | |
answer=${answer:-Y} # Default to 'Y' if just Enter is pressed | |
answer=$(echo "$answer" | tr '[:upper:]' '[:lower:]') # Convert to lowercase | |
if [ "$answer" != "y" ]; then | |
echo "Operation cancelled" | |
exit 1 | |
fi | |
fi | |
# Function to check if a directory is excluded | |
is_excluded_dir() { | |
local dir="$1" | |
for excluded in "${excludedirs[@]}"; do | |
if [[ "$dir" == "$excluded" ]]; then | |
return 0 | |
fi | |
done | |
return 1 | |
} | |
# Function to check if a file extension is excluded | |
is_excluded_ext() { | |
local file="$1" | |
for ext in "${excludeexts[@]}"; do | |
if [[ "$file" == *.$ext ]]; then | |
return 0 | |
fi | |
done | |
return 1 | |
} | |
# Function to check if a file is a text file | |
is_text_file() { | |
local file="$1" | |
if file "$file" | grep -qE 'text|JSON'; then | |
return 0 | |
else | |
return 1 | |
fi | |
} | |
# Recursive function to process directories | |
process_directory() { | |
local dir="$1" | |
if is_excluded_dir "$dir"; then | |
return | |
fi | |
for item in "$dir"/*; do | |
basename_item=$(basename "$item") | |
# Skip hidden files and directories | |
if [[ "$basename_item" == .* ]]; then | |
continue | |
fi | |
if [ -d "$item" ]; then | |
process_directory "$item" | |
elif [ -f "$item" ]; then | |
if is_excluded_ext "$item"; then | |
continue | |
fi | |
if is_text_file "$item"; then | |
echo "File '$item' is being processed" | |
echo "--------------------------------------------" >> "$outputfile" | |
echo "File: $item" >> "$outputfile" | |
echo "--------------------------------------------" >> "$outputfile" | |
echo >> "$outputfile" | |
cat "$item" >> "$outputfile" | |
echo >> "$outputfile" | |
else | |
echo "Skipping binary file: $item" | |
fi | |
fi | |
done | |
} | |
# Start the process, truncate the file | |
> "$outputfile" | |
process_directory "$sourcedir" | |
echo "All text files have been concatenated into '$outputfile'" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment