Created
May 5, 2020 18:41
-
-
Save gopigof/e941b93c658c9593666c55a8881de4ca to your computer and use it in GitHub Desktop.
Script to crawl a project and extract used packages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import os | |
import ast | |
import traceback | |
import sys | |
from contextlib import contextmanager | |
open_func = open | |
py2 =False | |
def join(f): | |
# return os.path.join(os.path.dirname(__file__), f) | |
return os.path.join(f) | |
@contextmanager | |
def _open(filename=None, mode='r'): | |
"""Open a file or ``sys.stdout`` depending on the provided filename. | |
Args: | |
filename (str): The path to the file that should be opened. If | |
``None`` or ``'-'``, ``sys.stdout`` or ``sys.stdin`` is | |
returned depending on the desired mode. Defaults to ``None``. | |
mode (str): The mode that should be used to open the file. | |
Yields: | |
A file handle. | |
""" | |
if not filename or filename == '-': | |
if not mode or 'r' in mode: | |
file = sys.stdin | |
elif 'w' in mode: | |
file = sys.stdout | |
else: | |
raise ValueError('Invalid mode for file: {}'.format(mode)) | |
else: | |
file = open(filename, mode) | |
try: | |
yield file | |
finally: | |
if file not in (sys.stdin, sys.stdout): | |
file.close() | |
def get_all_imports( | |
path, encoding=None, extra_ignore_dirs=None, follow_links=True): | |
imports = set() | |
raw_imports = set() | |
candidates = [] | |
ignore_errors = False | |
ignore_dirs = [".hg", ".svn", ".git", ".tox", "__pycache__", "env", "venv"] | |
if extra_ignore_dirs: | |
ignore_dirs_parsed = [] | |
for e in extra_ignore_dirs: | |
ignore_dirs_parsed.append(os.path.basename(os.path.realpath(e))) | |
ignore_dirs.extend(ignore_dirs_parsed) | |
walk = os.walk(path, followlinks=follow_links) | |
for root, dirs, files in walk: | |
dirs[:] = [d for d in dirs if d not in ignore_dirs] | |
candidates.append(os.path.basename(root)) | |
files = [fn for fn in files if os.path.splitext(fn)[1] == ".py"] | |
candidates += [os.path.splitext(fn)[0] for fn in files] | |
for file_name in files: | |
file_name = os.path.join(root, file_name) | |
with open_func(file_name, "r", encoding=encoding) as f: | |
contents = f.read() | |
try: | |
tree = ast.parse(contents) | |
for node in ast.walk(tree): | |
if isinstance(node, ast.Import): | |
for subnode in node.names: | |
raw_imports.add(subnode.name) | |
elif isinstance(node, ast.ImportFrom): | |
raw_imports.add(node.module) | |
except Exception as exc: | |
if ignore_errors: | |
traceback.print_exc(exc) | |
logging.warn("Failed on file: %s" % file_name) | |
continue | |
else: | |
logging.error("Failed on file: %s" % file_name) | |
raise exc | |
# Clean up imports | |
for name in [n for n in raw_imports if n]: | |
# Sanity check: Name could have been None if the import | |
# statement was as ``from . import X`` | |
# Cleanup: We only want to first part of the import. | |
# Ex: from django.conf --> django.conf. But we only want django | |
# as an import. | |
cleaned_name, _, _ = name.partition('.') | |
imports.add(cleaned_name) | |
packages = imports - (set(candidates) & imports) | |
logging.debug('Found packages: {0}'.format(packages)) | |
return packages | |
# with open(join("stdlib"), "r") as f: | |
# data = {x.strip() for x in f} | |
# | |
# data = {x for x in data if x not in py2_exclude} if py2 else data | |
# return list(packages - data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment