Skip to content

Instantly share code, notes, and snippets.

@gopigof
Created May 5, 2020 18:41
Show Gist options
  • Save gopigof/e941b93c658c9593666c55a8881de4ca to your computer and use it in GitHub Desktop.
Save gopigof/e941b93c658c9593666c55a8881de4ca to your computer and use it in GitHub Desktop.
Script to crawl a project and extract used packages
import logging
import os
import ast
import traceback
import sys
from contextlib import contextmanager
open_func = open
py2 =False
def join(f):
# return os.path.join(os.path.dirname(__file__), f)
return os.path.join(f)
@contextmanager
def _open(filename=None, mode='r'):
"""Open a file or ``sys.stdout`` depending on the provided filename.
Args:
filename (str): The path to the file that should be opened. If
``None`` or ``'-'``, ``sys.stdout`` or ``sys.stdin`` is
returned depending on the desired mode. Defaults to ``None``.
mode (str): The mode that should be used to open the file.
Yields:
A file handle.
"""
if not filename or filename == '-':
if not mode or 'r' in mode:
file = sys.stdin
elif 'w' in mode:
file = sys.stdout
else:
raise ValueError('Invalid mode for file: {}'.format(mode))
else:
file = open(filename, mode)
try:
yield file
finally:
if file not in (sys.stdin, sys.stdout):
file.close()
def get_all_imports(
path, encoding=None, extra_ignore_dirs=None, follow_links=True):
imports = set()
raw_imports = set()
candidates = []
ignore_errors = False
ignore_dirs = [".hg", ".svn", ".git", ".tox", "__pycache__", "env", "venv"]
if extra_ignore_dirs:
ignore_dirs_parsed = []
for e in extra_ignore_dirs:
ignore_dirs_parsed.append(os.path.basename(os.path.realpath(e)))
ignore_dirs.extend(ignore_dirs_parsed)
walk = os.walk(path, followlinks=follow_links)
for root, dirs, files in walk:
dirs[:] = [d for d in dirs if d not in ignore_dirs]
candidates.append(os.path.basename(root))
files = [fn for fn in files if os.path.splitext(fn)[1] == ".py"]
candidates += [os.path.splitext(fn)[0] for fn in files]
for file_name in files:
file_name = os.path.join(root, file_name)
with open_func(file_name, "r", encoding=encoding) as f:
contents = f.read()
try:
tree = ast.parse(contents)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for subnode in node.names:
raw_imports.add(subnode.name)
elif isinstance(node, ast.ImportFrom):
raw_imports.add(node.module)
except Exception as exc:
if ignore_errors:
traceback.print_exc(exc)
logging.warn("Failed on file: %s" % file_name)
continue
else:
logging.error("Failed on file: %s" % file_name)
raise exc
# Clean up imports
for name in [n for n in raw_imports if n]:
# Sanity check: Name could have been None if the import
# statement was as ``from . import X``
# Cleanup: We only want to first part of the import.
# Ex: from django.conf --> django.conf. But we only want django
# as an import.
cleaned_name, _, _ = name.partition('.')
imports.add(cleaned_name)
packages = imports - (set(candidates) & imports)
logging.debug('Found packages: {0}'.format(packages))
return packages
# with open(join("stdlib"), "r") as f:
# data = {x.strip() for x in f}
#
# data = {x for x in data if x not in py2_exclude} if py2 else data
# return list(packages - data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment