Skip to content

Instantly share code, notes, and snippets.

@diramazioni
Created March 26, 2024 11:08
Show Gist options
  • Save diramazioni/1e960f0433999efe39769b205e364c50 to your computer and use it in GitHub Desktop.
Save diramazioni/1e960f0433999efe39769b205e364c50 to your computer and use it in GitHub Desktop.
List files recursively from a json configuration passed as argument
import os
import fnmatch
import json
import argparse
def list_files_recursive(directory, include_patterns, exclude_dirs):
"""
List files recursively in the given directory,
filtering them by include patterns and excluding specified directories.
:param directory: The base directory to search in.
:param include_patterns: List of patterns of files to include.
:param exclude_dirs: List of directories names to exclude from the search.
:param batch: [optional] If >0 slice the data in batches of batch size.
:param start: [optional] If set start from this index.
:return: A list of file paths that match the include patterns and
are not within the excluded directories.
"""
matches = []
for root, dirnames, filenames in os.walk(directory):
# Exclude directories
dirnames[:] = [d for d in dirnames if d not in exclude_dirs]
# Filter files by include patterns
for pattern in include_patterns:
for filename in fnmatch.filter(filenames, pattern):
matches.append(os.path.join(root, filename))
return matches
def main():
# Set up argument parser
parser = argparse.ArgumentParser(description='List files recursively based on include and exclude patterns.')
parser.add_argument('input_json', help='''
{
"directory": "/path/to/search",
"includeFile": ["*.ts"],
"excludeDir": ["node_modules", "dist"],
"batch":10,
"start":0
}
''')
# Parse arguments
args = parser.parse_args()
# Convert the JSON string into a Python dictionary
try:
input_params = json.loads(args.input_json)
except json.JSONDecodeError:
raise ValueError("Invalid JSON provided as input")
# Extract parameters
search_directory = input_params.get('directory')
include_files = input_params.get('includeFile', [])
exclude_directories = input_params.get('excludeDir', [])
batch = input_params.get('batch', 0)
start = input_params.get('start', 0)
# Validate directory
if not os.path.isdir(search_directory):
raise ValueError(f"The specified directory does not exist: {search_directory}")
# Get the list of files
files = list_files_recursive(search_directory, include_files, exclude_directories)
# Slice if the batch is set
if batch > 0:
files = files[start:start+batch]
# Output the list of files
print(json.dumps(files))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment