Skip to content

Instantly share code, notes, and snippets.

@AIFahim
Last active February 10, 2025 05:30
Show Gist options
  • Save AIFahim/24d756a188b4aea93d53ccec8980338f to your computer and use it in GitHub Desktop.
Save AIFahim/24d756a188b4aea93d53ccec8980338f to your computer and use it in GitHub Desktop.
import collector from the codebase
import os
import ast
from typing import Set, List, Dict, Tuple
from pathlib import Path
from collections import defaultdict
def extract_imports_ast(file_path: str) -> Set[Tuple[str, str]]:
"""
Extract import statements using AST parsing.
Returns a set of tuples (import_statement, line_number).
"""
imports = set()
try:
with open(file_path, 'r', encoding='utf-8') as file:
tree = ast.parse(file.read())
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for name in node.names:
imports.add((f"import {name.name}", str(node.lineno)))
elif isinstance(node, ast.ImportFrom):
module = node.module or ''
names = ', '.join(name.name for name in node.names)
imports.add((f"from {module} import {names}", str(node.lineno)))
except Exception as e:
print(f"Error processing {file_path}: {str(e)}")
return imports
def find_python_files(directory: str) -> List[str]:
"""
Recursively find all Python files in the given directory.
"""
python_files = []
for root, _, files in os.walk(directory):
for file in files:
if file.endswith('.py'):
python_files.append(os.path.join(root, file))
return python_files
def ensure_valid_output_path(output_file: str) -> str:
"""
Ensure the output file path is valid and writable.
Returns a valid file path.
"""
output_file = os.path.abspath(output_file)
if os.path.isdir(output_file):
output_file = os.path.join(output_file, 'imports.txt')
output_dir = os.path.dirname(output_file)
try:
os.makedirs(output_dir, exist_ok=True)
except PermissionError:
print(f"Cannot write to {output_dir}")
output_file = os.path.join(os.getcwd(), 'imports.txt')
print(f"Using current directory instead: {output_file}")
return output_file
def collect_imports(directory: str, output_file: str):
"""
Main function to collect imports from all Python files and save to output file.
"""
python_files = find_python_files(directory)
if not python_files:
print(f"No Python files found in {directory}")
return
# Dictionary to store imports by file
imports_by_file = defaultdict(set)
# Dictionary to store which files use each import
files_by_import = defaultdict(set)
total_files = len(python_files)
processed_files = 0
for file_path in python_files:
processed_files += 1
rel_path = os.path.relpath(file_path, directory)
print(f"Processing [{processed_files}/{total_files}]: {rel_path}")
file_imports = extract_imports_ast(file_path)
for import_stmt, line_num in file_imports:
imports_by_file[file_path].add((import_stmt, line_num))
files_by_import[import_stmt].add((file_path, line_num))
if not files_by_import:
print("No import statements found.")
return
output_file = ensure_valid_output_path(output_file)
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write("# Collected Import Statements\n\n")
# First section: Imports by File
f.write("=" * 80 + "\n")
f.write("IMPORTS BY FILE\n")
f.write("=" * 80 + "\n\n")
for file_path in sorted(imports_by_file.keys()):
rel_path = os.path.relpath(file_path, directory)
f.write(f"\n## {rel_path}\n")
# Sort by line number (converting to int for proper numerical sorting)
sorted_imports = sorted(imports_by_file[file_path], key=lambda x: int(x[1]))
for import_stmt, line_num in sorted_imports:
f.write(f"Line {line_num}: {import_stmt}\n")
# Second section: Usage of Each Import
f.write("\n\n" + "=" * 80 + "\n")
f.write("IMPORT STATEMENT USAGE\n")
f.write("=" * 80 + "\n\n")
for import_stmt in sorted(files_by_import.keys()):
f.write(f"\n{import_stmt}\n")
f.write("-" * len(import_stmt) + "\n")
for file_path, line_num in sorted(files_by_import[import_stmt]):
rel_path = os.path.relpath(file_path, directory)
f.write(f" - {rel_path} (line {line_num})\n")
# Summary statistics
f.write("\n\n" + "=" * 80 + "\n")
f.write("SUMMARY\n")
f.write("=" * 80 + "\n\n")
f.write(f"Total Python files processed: {len(python_files)}\n")
f.write(f"Total unique import statements: {len(files_by_import)}\n")
print(f"\nAnalysis complete!")
print(f"Processed {len(python_files)} Python files")
print(f"Found {len(files_by_import)} unique import statements")
print(f"Results saved to: {output_file}")
except PermissionError:
print(f"\nError: Cannot write to {output_file}")
fallback_file = os.path.join(os.getcwd(), 'imports.txt')
try:
with open(fallback_file, 'w', encoding='utf-8') as f:
f.write("# Collected Import Statements\n\n")
for import_stmt in sorted(files_by_import.keys()):
f.write(f"\n{import_stmt}\n")
for file_path, line_num in sorted(files_by_import[import_stmt]):
rel_path = os.path.relpath(file_path, directory)
f.write(f" - {rel_path} (line {line_num})\n")
print(f"Results saved to fallback location: {fallback_file}")
except PermissionError:
print("Error: Cannot write to current directory either.")
print("Import statements found:")
for import_stmt in sorted(files_by_import.keys()):
print(f"\n{import_stmt}")
for file_path, line_num in sorted(files_by_import[import_stmt]):
rel_path = os.path.relpath(file_path, directory)
print(f" - {rel_path} (line {line_num})")
if __name__ == "__main__":
directory = input("Enter the directory path to scan: ")
output_file = input("Enter the output file path (default: imports.txt): ").strip() or "imports.txt"
directory = os.path.abspath(directory)
if not os.path.exists(directory):
print(f"Error: Directory '{directory}' does not exist!")
else:
print(f"\nScanning directory: {directory}")
collect_imports(directory, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment