Skip to content

Instantly share code, notes, and snippets.

@dorukcan
Last active July 21, 2019 12:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dorukcan/3824c779ed51be7bb9a6666c5d25c0a5 to your computer and use it in GitHub Desktop.
Save dorukcan/3824c779ed51be7bb9a6666c5d25c0a5 to your computer and use it in GitHub Desktop.
dummy pip freeze (so dummy that i didn't simplify the main loop, but it haz cool commentz lol)
import os
import re
from collections import Counter
def main():
result = []
# folder for starting scrape
start_folder = r"/Users/doruk/PycharmProjects/venus"
# folder for not looking
excluded_folders = ["venv", "external", "virtualenv", "site-packages", "node_modules", ".ipynb_checkpoints"]
file_extensions = [".py", ".ipynb"]
# walk through the starting folder
for root, dirs, files in os.walk(start_folder):
# remove excluded folders
dirs[:] = [d for d in dirs if d not in excluded_folders]
if root in excluded_folders:
dirs[:] = []
for file_name in files:
file_ext = os.path.splitext(file_name)[1]
if file_ext not in file_extensions:
continue
file_path = os.path.join(root, file_name)
response = do_file(file_path)
result.extend(response)
output = Counter(result)
for x in output.most_common(1000):
print(x)
def do_file(file_path):
# order and prepare regex queries
regexes = [
re.compile("from (.*) import (.*) as"),
re.compile("from (.*) import (.*)"),
re.compile("import (.*) as"),
re.compile("import (.*)"),
]
result = []
f = open(file_path, "r", encoding="utf-8")
for line in f.readlines():
if "ImportError" in line:
continue
for reg_pattern in regexes:
check = re.search(reg_pattern, line)
if not check:
continue
value = check.group(1)
value = value.replace('\n",', '').strip()
# import abc.def
if "." in value:
value = value.split(".")[0]
# import abc, def
if "," in value:
value = value.split(",")
if type(value) is list:
result.extend([v.strip().replace('\\n"', '') for v in value])
else:
result.append(value)
break
f.close()
return result
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment