Skip to content

Instantly share code, notes, and snippets.

@jwilk
Created October 23, 2021 07:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jwilk/c7b23a11473776088f9a3ef12573f476 to your computer and use it in GitHub Desktop.
Save jwilk/c7b23a11473776088f9a3ef12573f476 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
#
# Copyright (c) 2008 Sandro Tosi <morph@debian.org>
# Copyright (c) 2009-2010 Luca Falavigna <dktrkranz@debian.org>
# License: Public Domain
#
#
# This tool is written to help Debian Python Modules/Apps Teams
# packagers identify what debian packages are needed to execute the
# tool being packaged.
#
# It archives it by identify the import statements and then by
# searching on the installed packages which one contains the Python
# file of the module.
#
# NOTE: the biggest wickness is that it can identify only modules for
# installed packages; to help you still identify the needed modules,
# it prints the list of not verified modules.
#
# TODO:
# * add support for __import__ function (even if it's done only on
# some rare cases), it's just another check while parsing AST
# * it's not PEP8 complient (pardon me...)
# * better formatting of output:
# - instead of print <module>: (<deb pkg>, # of occurrence) it
# might be better something like <deb pkg>: ((<mod1>,<mod2>,...)
# sum of # of occurrence)
# * possible package suggestions for unidentified modules, like check
# if exists a debpkg name "python-<module>"
# * fix all the other tons of bugs I made here and there :)
# http://docs.python.org/library/compiler.html
# deprecated in 2.6, removed in 3.0, but we are still on 2.5...
import compiler
# using it to type checking the object in the AST
from compiler.ast import Import, From
import sys
import glob
import os
import dircache
import stat
import subprocess
import re
python_shebang = re.compile('#!/usr/bin/(env |)?python(\d\.\d|)?$')
class ImportNotFound(Exception):
"""Errors in importing a module"""
pass
class ImportParseError(Exception):
"""Errors accessing module information"""
pass
def convert_import_to_debian_pkg(imp):
"""Tries to identify the Debian package from the module name"""
try:
# import the module, mapping it to 'mod'
mod = __import__(imp)
# accessing the __file__ attribute
file = mod.__file__
# adjust the extension
file = file.replace('.pyc','.py')
# obtain the mode to know if it's a symlink
# lstat doesn't follow symlinks
mode = os.lstat(file)[stat.ST_MODE]
if stat.S_ISLNK(mode):
# if it's a symlink, follow it
file = os.readlink(file)
# exec dpkg -S to obtain the package containing file
proc = subprocess.Popen("dpkg -S " + file + " | awk -F':' '{ print $1 }'",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# get (stdout, stderr) tuple
output = proc.communicate()
# if no stderr
if not output[1]:
pkg = output[0].replace('\n','')
# else, raise exp
else:
raise ImportNotFound()
return pkg
except ImportError, e:
# module not found
print "E: ImportError while checking %s; exception: %s" % (imp, str(e))
raise ImportNotFound()
except Exception, e:
# __file__ attribute doesn't exist or any other error
print "E: error while checking %s; exception: %s" % (imp, str(e))
raise ImportParseError()
def find_py_files_in_dir(path):
"""Find all py files in a given directory; thanks to recipe 2.19 from Python Cookbook"""
# pattern matching any case of "py" extension
for match in glob.glob(os.path.join(path, "*.[Pp][Yy]")):
yield match
def find_py_shebang_in_dir(path):
"""Find all files with Python shebang in a given directory"""
# pattern matching any case of "py" extension
for match in glob.glob(os.path.join(path, "*")):
if not os.path.isfile(match):
continue
try:
f = open(match)
except IOError:
continue
shebang = f.readline()
f.close()
if re.match(python_shebang, shebang):
yield match
def find_py_files_in_dir_recursive(path):
"""Find all py files in a given directory, then go recursing subdirs"""
# check first in the dir passed as parameter
for match in find_py_files_in_dir(path):
yield match
for match in find_py_shebang_in_dir(path):
yield match
# dircache output is sorted and cached
# let's join path and item, since files list
# returned from listdir has path stripped off
for subpath in [os.path.join(path, item) for item in dircache.listdir(path)]:
# if it's a dir, then go recursive on it
if os.path.isdir(subpath):
# yield every item found in the recursive call!
for subfile in find_py_files_in_dir_recursive(subpath):
yield subfile
def parse_file_import(data):
try:
# scan nodes...
for child in data.getChildren():
# ... until you reach an Import object...
if isinstance(child, Import):
# ... then add it to import dict
for name, alias in child.names:
add_value_to_dict(import_dict, name, 1)
# the same for From objects
elif isinstance(child, From):
add_value_to_dict(import_dict, child.modname, 1)
else:
# if object is not From or Import, check his childred
parse_file_import(child)
except:
pass
def add_value_to_dict(dict, key, value):
"""Adds value to dict[key], or add the item if missing"""
if key in dict:
dict[key] += value
else:
dict[key] = value
# main
import_dict = {}
mod_pkgs = {}
mod_not_found = {}
# main file parse loop
for file in find_py_files_in_dir_recursive(sys.argv[1]):
try:
# parses the syntax tree
parse_file_import(compiler.parseFile(file))
except Exception, e:
print "Error parsing " + file + "; exception: " + str(e)
# loop to identify the deb pkg containg each module, or add to discards list
for module, count in import_dict.iteritems():
try:
pkg = convert_import_to_debian_pkg(module)
mod_pkgs[module] = (pkg, count)
except ImportNotFound:
mod_not_found[module] = ('module not found on this machine', count)
except ImportParseError:
mod_not_found[module] = ('error parsing module', count)
# temporary output printing
import pprint
pprint.pprint(mod_pkgs)
pprint.pprint(mod_not_found)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment