Last active
July 21, 2019 12:21
-
-
Save dorukcan/3824c779ed51be7bb9a6666c5d25c0a5 to your computer and use it in GitHub Desktop.
dummy pip freeze (so dummy that i didn't simplify the main loop, but it haz cool commentz lol)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
from collections import Counter | |
def main(): | |
result = [] | |
# folder for starting scrape | |
start_folder = r"/Users/doruk/PycharmProjects/venus" | |
# folder for not looking | |
excluded_folders = ["venv", "external", "virtualenv", "site-packages", "node_modules", ".ipynb_checkpoints"] | |
file_extensions = [".py", ".ipynb"] | |
# walk through the starting folder | |
for root, dirs, files in os.walk(start_folder): | |
# remove excluded folders | |
dirs[:] = [d for d in dirs if d not in excluded_folders] | |
if root in excluded_folders: | |
dirs[:] = [] | |
for file_name in files: | |
file_ext = os.path.splitext(file_name)[1] | |
if file_ext not in file_extensions: | |
continue | |
file_path = os.path.join(root, file_name) | |
response = do_file(file_path) | |
result.extend(response) | |
output = Counter(result) | |
for x in output.most_common(1000): | |
print(x) | |
def do_file(file_path): | |
# order and prepare regex queries | |
regexes = [ | |
re.compile("from (.*) import (.*) as"), | |
re.compile("from (.*) import (.*)"), | |
re.compile("import (.*) as"), | |
re.compile("import (.*)"), | |
] | |
result = [] | |
f = open(file_path, "r", encoding="utf-8") | |
for line in f.readlines(): | |
if "ImportError" in line: | |
continue | |
for reg_pattern in regexes: | |
check = re.search(reg_pattern, line) | |
if not check: | |
continue | |
value = check.group(1) | |
value = value.replace('\n",', '').strip() | |
# import abc.def | |
if "." in value: | |
value = value.split(".")[0] | |
# import abc, def | |
if "," in value: | |
value = value.split(",") | |
if type(value) is list: | |
result.extend([v.strip().replace('\\n"', '') for v in value]) | |
else: | |
result.append(value) | |
break | |
f.close() | |
return result | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment