Skip to content

Instantly share code, notes, and snippets.

@verhovsky
Last active February 7, 2021 13:04
Show Gist options
  • Save verhovsky/52e6c344a952d467b265c28acc905f56 to your computer and use it in GitHub Desktop.
Save verhovsky/52e6c344a952d467b265c28acc905f56 to your computer and use it in GitHub Desktop.
Which modules are imported in the Python standard library most often?
#!/usr/bin/env python3
# This script scans all .py files in cpython/Lib, finds all import statements
# in them and counts out what is imported most often in each top directory
# or file in Lib/ (meaning if some library is imported many times in various
# files in e.g. Lib/http/ , it will only be counted once).
#
# Clone the CPython repo then put this file in the top level cpython/
# directory and run it:
#
# cd /tmp
# git clone https://github.com/python/cpython/
# cd cpython
# # <download find_improts.py to this directory>
# python3 find_imports.py
from pathlib import Path
import ast
import itertools
from operator import itemgetter
import json
from collections import Counter
CPYTHON_LIB = Path("Lib")
imported = {}
for p in CPYTHON_LIB.rglob("*.py"):
if "test" in p.parts or "tests" in p.parts or p.name.startswith("test_"):
continue
lib = p.relative_to(CPYTHON_LIB).parts[0].removesuffix(".py")
with open(p) as f:
parsed = ast.parse(f.read())
imports = []
for node in ast.walk(parsed):
if isinstance(node, ast.Import):
for alias in node.names:
imports.append(alias.name)
if isinstance(node, ast.ImportFrom):
if node.level == 0: # Don't include relative imports
imports.append(node.module)
# Count "collections.abc" as "collections"
imports = [i.split(".")[0] for i in imports]
# If the library is a directory with multiple files, imported might
# already contain something.
new_imports = set(imported.get(lib, [])) | set(imports)
# Don't count self imports
imported[lib] = sorted(new_imports - {lib, "__main__"})
# print(json.dumps(imported, indent=4))
most_imported = dict(Counter(itertools.chain.from_iterable(imported.values())))
for lib, count in sorted(most_imported.items(), key=itemgetter(1, 0)):
print(lib, count)
Mailman 1
_abc 1
_ast 1
_asyncio 1
_bisect 1
_blake2 1
_bootsubprocess 1
_bz2 1
_codecs 1
_codecs_cn 1
_codecs_hk 1
_codecs_iso2022 1
_codecs_jp 1
_codecs_kr 1
_codecs_tw 1
_compat_pickle 1
_contextvars 1
_crypt 1
_csv 1
_ctypes 1
_curses 1
_curses_panel 1
_datetime 1
_dbm 1
_decimal 1
_elementtree 1
_functools 1
_gdbm 1
_heapq 1
_json 1
_lsprof 1
_lzma 1
_markupbase 1
_md5 1
_msi 1
_multibytecodec 1
_multiprocessing 1
_opcode 1
_overlapped 1
_pickle 1
_posixshmem 1
_py_abc 1
_pydecimal 1
_queue 1
_random 1
_scproxy 1
_sha1 1
_sha256 1
_sha3 1
_signal 1
_sitebuiltins 1
_socket 1
_sqlite3 1
_ssl 1
_stat 1
_statistics 1
_string 1
_strptime 1
_struct 1
_symtable 1
_threading_local 1
_tracemalloc 1
_uuid 1
_winreg 1
_zoneinfo 1
aifc 1
asynchat 1
asyncio 1
cgi 1
codeop 1
csv 1
dataclasses 1
dbm 1
distutils 1
docutils 1
filecmp 1
fractions 1
ftplib 1
idlelib 1
imghdr 1
json 1
lib2to3 1
mmap 1
msilib 1
nturl2path 1
opcode 1
pdb 1
pep517 1
pgen2 1
pip 1
profile 1
pyclbr 1
pydoc_data 1
pyexpat 1
rlcompleter 1
secrets 1
site 1
sitecustomize 1
sndhdr 1
sre_compile 1
statistics 1
stringprep 1
tabnanny 1
tracemalloc 1
turtle 1
usercustomize 1
uu 1
vms_lib 1
wave 1
win32api 1
win32con 1
win32evtlog 1
win32evtlogutil 1
xmlrpc 1
zipimport 1
_aix_support 2
_collections 2
_frozen_importlib 2
_frozen_importlib_external 2
_hashlib 2
_locale 2
_operator 2
_osx_support 2
_posixsubprocess 2
_sha512 2
_sre 2
_tkinter 2
array 2
asyncore 2
bdb 2
chunk 2
cmd 2
contextvars 2
ctypes 2
encodings 2
genericpath 2
java 2
mimetypes 2
multiprocessing 2
netrc 2
ntpath 2
plistlib 2
pstats 2
quopri 2
smtplib 2
sre_parse 2
tarfile 2
termios 2
test 2
tty 2
unittest 2
xml 2
__future__ 3
_compression 3
_warnings 3
audioop 3
code 3
concurrent 3
decimal 3
difflib 3
gc 3
getpass 3
gettext 3
gzip 3
lzma 3
numbers 3
org 3
pkgutil 3
posix 3
py_compile 3
sre_constants 3
tkinter 3
token 3
_weakref 4
_weakrefset 4
_winapi 4
configparser 4
dis 4
doctest 4
fcntl 4
hmac 4
html 4
optparse 4
pathlib 4
platform 4
queue 4
socketserver 4
unicodedata 4
webbrowser 4
zipfile 4
_imp 5
_io 5
ast 5
bisect 5
bz2 5
copyreg 5
grp 5
heapq 5
http 5
keyword 5
msvcrt 5
pydoc 5
runpy 5
shlex 5
signal 5
textwrap 5
typing 5
winreg 5
atexit 6
glob 6
nt 6
reprlib 6
selectors 6
sysconfig 6
calendar 7
hashlib 7
pprint 7
readline 7
zlib 7
_collections_abc 8
locale 8
logging 8
marshal 8
random 8
select 8
ssl 8
codecs 9
pickle 9
_thread 10
base64 10
math 10
posixpath 10
binascii 11
email 11
fnmatch 11
pwd 11
tokenize 11
datetime 12
linecache 12
operator 12
shutil 12
string 12
urllib 12
weakref 12
enum 13
abc 14
getopt 15
inspect 15
tempfile 15
importlib 16
stat 16
subprocess 16
threading 16
contextlib 17
copy 17
errno 20
socket 21
argparse 22
struct 22
traceback 22
builtins 24
itertools 30
functools 35
types 35
time 40
warnings 50
collections 51
io 55
re 67
os 101
sys 128
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment