Skip to content

Instantly share code, notes, and snippets.

@kkew3
Last active May 14, 2022 22:02
Show Gist options
  • Save kkew3/3bd1e0255af63e3ac0801c7bbc89e7bf to your computer and use it in GitHub Desktop.
Save kkew3/3bd1e0255af63e3ac0801c7bbc89e7bf to your computer and use it in GitHub Desktop.
Find all Python imports in source file(s) and/or ipython notebooks, and optionally exclude installed modules, effectively creating a requirements.txt candidate. Python 2.7/3.x compatible.
from __future__ import print_function
import argparse
import os
import ast
import sys
import pkgutil
import itertools
import logging
import json
def make_parser():
parser = argparse.ArgumentParser(
description=('List all root imports. The *root* import of '
'`import pkg1.mod1` is "pkg1".'))
parse_opts = parser.add_mutually_exclusive_group()
parse_opts.add_argument(
'-g',
'--greedy',
action='store_true',
help=('find also import statements within try block, '
'if block, while block, function definition, '
'etc.'))
parse_opts.add_argument(
'-a',
'--all',
action='store_true',
help=('first list all minimal-required root '
'imports (without `-g\'), then list '
'additionally-required root imports (with '
'`-g\'), and explain the two lists'))
parser.add_argument(
'-i',
'--include-installed',
action='store_true',
help='include installed/built-in modules/packages')
parser.add_argument(
'-T',
'--files-from',
metavar='LIST_FILE',
help=('if specified, the files to process '
'will be read one per line from '
'LIST_FILE; if specified as `-\', '
'stdin will be expected to contain '
'the files to process. Note that '
'SOURCE_FILEs, if exist, take '
'precedence (see below)'))
parser.add_argument(
'--ipynb',
action='store_true',
help=('if specified, the files ending with '
'".ipynb" in either SOURCE_FILEs or '
'LIST_FILE will be parsed as ipython '
'notebook files rather than Python '
'files'))
parser.add_argument(
'rootpkg',
metavar='ROOTPKG_DIR',
type=dir_type,
help=
('the directory of the root package. See '
'https://docs.python.org/3.7/distutils/setupscript.html#listing-whole-packages '
'about *root package*. Local packages/modules will be '
'excluded from the results. For example, if '
'there are "mod1.py" and "mod2.py", and in '
'"mod2.py" there is `import mod1`, then "mod1" '
'won\'t be listed in the result.'))
parser.add_argument(
'filenames',
metavar='SOURCE_FILE',
nargs='*',
help=('if specified one or more files, '
'only these SOURCE_FILEs will get '
'processed regardless of `-T\' '
'option; if no SOURCE_FILE is '
'specified, `-T\', if exists, is '
'processed. In both cases, the '
'final results will be joined'))
return parser
def dir_type(string):
if not os.path.isdir(string):
raise argparse.ArgumentTypeError('must be a directory')
return string
# Reference: https://stackoverflow.com/a/9049549/7881370
def yield_imports(root, greedy):
"""
Yield all absolute imports.
"""
traverse = ast.walk if greedy else ast.iter_child_nodes
for node in traverse(root):
if isinstance(node, ast.Import):
for a in node.names:
yield a.name
elif isinstance(node, ast.ImportFrom):
# if node.level > 0, the import is relative
if node.level == 0:
yield node.module
def exists_local(path, rootpkg):
"""
Returns ``True`` if the absolute import ``path`` refers to a package or
a module residing under the working directory, else ``False``.
"""
filepath = os.path.join(rootpkg, path.replace('.', os.path.sep))
# see if path is a local package
if os.path.isdir(filepath) and os.path.isfile(
os.path.join(filepath, '__init__.py')):
return True
# see if path is a local module
if os.path.isfile(filepath + '.py'):
return True
return False
def filter_local(imports_iterable, rootpkg):
"""
Remove modules and packages in the working directory, and yield root
imports.
"""
for path in imports_iterable:
if not exists_local(path, rootpkg):
yield path.split('.', 1)[0]
def filter_installed(imports_iterable):
"""
Remove modules and packages already installed, which include built-in
modules and packages and those already installed (e.g. via ``pip``).
"""
installed = set(
itertools.chain(sys.builtin_module_names,
(x[1] for x in pkgutil.iter_modules())))
for name in imports_iterable:
if name not in installed:
yield name
def collect_sources(filenames, files_from):
if filenames:
for filename in filenames:
yield filename
elif files_from == '-':
try:
for line in sys.stdin:
yield line.rstrip('\n')
except KeyboardInterrupt:
pass
elif files_from:
try:
with open(files_from) as infile:
for line in infile:
yield line.rstrip('\n')
except OSError:
logging.exception('failed to read from "{}"'.format(files_from))
def parse_python(filename):
with open(filename) as infile:
root = ast.parse(infile.read(), filename)
return root
def parse_ipynb(filename):
source = []
with open(filename) as infile:
obj = json.load(infile)
for c in obj['cells']:
if c['cell_type'] == 'code':
source.extend(map(str.rstrip, c['source']))
source = (l for l in source if not l.lstrip().startswith('%'))
source = '\n'.join(source)
root = ast.parse(source, filename)
return root
def produce_results(filenames, files_from, greedy, rootpkg, include_installed,
ipynb):
all_imports = []
for filename in collect_sources(filenames, files_from):
parse_source = (parse_ipynb if ipynb and filename.endswith('.ipynb')
else parse_python)
try:
root = parse_source(filename)
except OSError:
logging.exception('skipped')
except SyntaxError:
logging.exception('failed to parse "{}"; skipped'.format(filename))
else:
all_imports.append(yield_imports(root, greedy))
all_imports = itertools.chain.from_iterable(all_imports)
all_imports = filter_local(all_imports, rootpkg)
if not include_installed:
all_imports = filter_installed(all_imports)
all_imports = set(all_imports)
return all_imports
def main():
logging.basicConfig(format='%(levelname)s: %(message)s')
args = make_parser().parse_args()
if not args.all:
all_imports = produce_results(args.filenames, args.files_from,
args.greedy, args.rootpkg,
args.include_installed, args.ipynb)
if all_imports:
print('\n'.join(sorted(all_imports)))
else:
min_imports = produce_results(args.filenames, args.files_from,
False, args.rootpkg,
args.include_installed, args.ipynb)
max_imports = produce_results(args.filenames, args.files_from,
True, args.rootpkg,
args.include_installed, args.ipynb)
extra_imports = max_imports - min_imports
printed_min_imports = False
if min_imports:
print('# minimal imports:')
print('\n'.join(sorted(min_imports)))
printed_min_imports = True
if extra_imports:
# pretty formatting purpose
if printed_min_imports:
print()
print('# additional possible imports:')
print('\n'.join(sorted(extra_imports)))
logging.shutdown()
if __name__ == '__main__':
main()
#!/bin/bash
python pyimports.py "$@"
#!/bin/bash
python3 pyimports.py "$@"
usage: pyimports.py [-h] [-g | -a] [-i] [-T LIST_FILE] [--ipynb] ROOTPKG_DIR [SOURCE_FILE ...]

List all root imports. The *root* import of `import pkg1.mod1` is "pkg1".

positional arguments:
  ROOTPKG_DIR           the directory of the root package. See
                        https://docs.python.org/3.7/distutils/setupscript.html#listing-whole-
                        packages about *root package*. Local packages/modules will be excluded
                        from the results. For example, if there are "mod1.py" and "mod2.py", and
                        in "mod2.py" there is `import mod1`, then "mod1" won't be listed in the
                        result.
  SOURCE_FILE           if specified one or more files, only these SOURCE_FILEs will get processed
                        regardless of `-T' option; if no SOURCE_FILE is specified, `-T', if
                        exists, is processed. In both cases, the final results will be joined

optional arguments:
  -h, --help            show this help message and exit
  -g, --greedy          find also import statements within try block, if block, while block,
                        function definition, etc.
  -a, --all             first list all minimal-required root imports (without `-g'), then list
                        additionally-required root imports (with `-g'), and explain the two lists
  -i, --include-installed
                        include installed/built-in modules/packages
  -T LIST_FILE, --files-from LIST_FILE
                        if specified, the files to process will be read one per line from
                        LIST_FILE; if specified as `-', stdin will be expected to contain the
                        files to process. Note that SOURCE_FILEs, if exist, take precedence (see
                        below)
  --ipynb               if specified, the files ending with ".ipynb" in either SOURCE_FILEs or
                        LIST_FILE will be parsed as ipython notebook files rather than Python
                        files
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment