Last active
February 10, 2025 05:30
-
-
Save AIFahim/24d756a188b4aea93d53ccec8980338f to your computer and use it in GitHub Desktop.
import collector from the codebase
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import ast | |
from typing import Set, List, Dict, Tuple | |
from pathlib import Path | |
from collections import defaultdict | |
def extract_imports_ast(file_path: str) -> Set[Tuple[str, str]]: | |
""" | |
Extract import statements using AST parsing. | |
Returns a set of tuples (import_statement, line_number). | |
""" | |
imports = set() | |
try: | |
with open(file_path, 'r', encoding='utf-8') as file: | |
tree = ast.parse(file.read()) | |
for node in ast.walk(tree): | |
if isinstance(node, ast.Import): | |
for name in node.names: | |
imports.add((f"import {name.name}", str(node.lineno))) | |
elif isinstance(node, ast.ImportFrom): | |
module = node.module or '' | |
names = ', '.join(name.name for name in node.names) | |
imports.add((f"from {module} import {names}", str(node.lineno))) | |
except Exception as e: | |
print(f"Error processing {file_path}: {str(e)}") | |
return imports | |
def find_python_files(directory: str) -> List[str]: | |
""" | |
Recursively find all Python files in the given directory. | |
""" | |
python_files = [] | |
for root, _, files in os.walk(directory): | |
for file in files: | |
if file.endswith('.py'): | |
python_files.append(os.path.join(root, file)) | |
return python_files | |
def ensure_valid_output_path(output_file: str) -> str: | |
""" | |
Ensure the output file path is valid and writable. | |
Returns a valid file path. | |
""" | |
output_file = os.path.abspath(output_file) | |
if os.path.isdir(output_file): | |
output_file = os.path.join(output_file, 'imports.txt') | |
output_dir = os.path.dirname(output_file) | |
try: | |
os.makedirs(output_dir, exist_ok=True) | |
except PermissionError: | |
print(f"Cannot write to {output_dir}") | |
output_file = os.path.join(os.getcwd(), 'imports.txt') | |
print(f"Using current directory instead: {output_file}") | |
return output_file | |
def collect_imports(directory: str, output_file: str): | |
""" | |
Main function to collect imports from all Python files and save to output file. | |
""" | |
python_files = find_python_files(directory) | |
if not python_files: | |
print(f"No Python files found in {directory}") | |
return | |
# Dictionary to store imports by file | |
imports_by_file = defaultdict(set) | |
# Dictionary to store which files use each import | |
files_by_import = defaultdict(set) | |
total_files = len(python_files) | |
processed_files = 0 | |
for file_path in python_files: | |
processed_files += 1 | |
rel_path = os.path.relpath(file_path, directory) | |
print(f"Processing [{processed_files}/{total_files}]: {rel_path}") | |
file_imports = extract_imports_ast(file_path) | |
for import_stmt, line_num in file_imports: | |
imports_by_file[file_path].add((import_stmt, line_num)) | |
files_by_import[import_stmt].add((file_path, line_num)) | |
if not files_by_import: | |
print("No import statements found.") | |
return | |
output_file = ensure_valid_output_path(output_file) | |
try: | |
with open(output_file, 'w', encoding='utf-8') as f: | |
f.write("# Collected Import Statements\n\n") | |
# First section: Imports by File | |
f.write("=" * 80 + "\n") | |
f.write("IMPORTS BY FILE\n") | |
f.write("=" * 80 + "\n\n") | |
for file_path in sorted(imports_by_file.keys()): | |
rel_path = os.path.relpath(file_path, directory) | |
f.write(f"\n## {rel_path}\n") | |
# Sort by line number (converting to int for proper numerical sorting) | |
sorted_imports = sorted(imports_by_file[file_path], key=lambda x: int(x[1])) | |
for import_stmt, line_num in sorted_imports: | |
f.write(f"Line {line_num}: {import_stmt}\n") | |
# Second section: Usage of Each Import | |
f.write("\n\n" + "=" * 80 + "\n") | |
f.write("IMPORT STATEMENT USAGE\n") | |
f.write("=" * 80 + "\n\n") | |
for import_stmt in sorted(files_by_import.keys()): | |
f.write(f"\n{import_stmt}\n") | |
f.write("-" * len(import_stmt) + "\n") | |
for file_path, line_num in sorted(files_by_import[import_stmt]): | |
rel_path = os.path.relpath(file_path, directory) | |
f.write(f" - {rel_path} (line {line_num})\n") | |
# Summary statistics | |
f.write("\n\n" + "=" * 80 + "\n") | |
f.write("SUMMARY\n") | |
f.write("=" * 80 + "\n\n") | |
f.write(f"Total Python files processed: {len(python_files)}\n") | |
f.write(f"Total unique import statements: {len(files_by_import)}\n") | |
print(f"\nAnalysis complete!") | |
print(f"Processed {len(python_files)} Python files") | |
print(f"Found {len(files_by_import)} unique import statements") | |
print(f"Results saved to: {output_file}") | |
except PermissionError: | |
print(f"\nError: Cannot write to {output_file}") | |
fallback_file = os.path.join(os.getcwd(), 'imports.txt') | |
try: | |
with open(fallback_file, 'w', encoding='utf-8') as f: | |
f.write("# Collected Import Statements\n\n") | |
for import_stmt in sorted(files_by_import.keys()): | |
f.write(f"\n{import_stmt}\n") | |
for file_path, line_num in sorted(files_by_import[import_stmt]): | |
rel_path = os.path.relpath(file_path, directory) | |
f.write(f" - {rel_path} (line {line_num})\n") | |
print(f"Results saved to fallback location: {fallback_file}") | |
except PermissionError: | |
print("Error: Cannot write to current directory either.") | |
print("Import statements found:") | |
for import_stmt in sorted(files_by_import.keys()): | |
print(f"\n{import_stmt}") | |
for file_path, line_num in sorted(files_by_import[import_stmt]): | |
rel_path = os.path.relpath(file_path, directory) | |
print(f" - {rel_path} (line {line_num})") | |
if __name__ == "__main__": | |
directory = input("Enter the directory path to scan: ") | |
output_file = input("Enter the output file path (default: imports.txt): ").strip() or "imports.txt" | |
directory = os.path.abspath(directory) | |
if not os.path.exists(directory): | |
print(f"Error: Directory '{directory}' does not exist!") | |
else: | |
print(f"\nScanning directory: {directory}") | |
collect_imports(directory, output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment