Last active
December 26, 2015 15:29
-
-
Save dstufft/7173539 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import collections | |
import json | |
import os | |
import tarfile | |
import zipfile | |
BASE_DIR = "bandersnatch/data/web/packages/source" | |
counter = collections.Counter() | |
projects = set() | |
def _tarball_dependency_links(filepath): | |
try: | |
with tarfile.open(filepath) as tar: | |
for name in tar.getnames(): | |
if name.endswith("dependency_links.txt"): | |
dep_links = tar.extractfile(name) | |
if dep_links.read().strip(): | |
return True | |
else: | |
return False | |
break | |
except tarfile.ReadError: | |
pass | |
def _zipfile_dependency_links(filepath): | |
with zipfile.ZipFile(filepath) as zfile: | |
for name in zfile.namelist(): | |
if name.endswith("dependency_links.txt"): | |
dep_links = zfile.open(name) | |
if dep_links.read().strip(): | |
return True | |
else: | |
return False | |
break | |
for root, directory, filenames in os.walk(BASE_DIR): | |
for filename in filenames: | |
filepath = os.path.join(root, filename) | |
if filepath.endswith(".tar.gz"): | |
dep_link = _tarball_dependency_links(filepath) | |
elif filepath.endswith(".tar.bz2"): | |
dep_link = _tarball_dependency_links(filepath) | |
elif filepath.endswith(".tgz"): | |
dep_link = _tarball_dependency_links(filepath) | |
elif filepath.endswith(".zip"): | |
dep_link = _zipfile_dependency_links(filepath) | |
else: | |
continue | |
project = os.path.basename(os.path.dirname(filepath)) | |
if project not in projects: | |
counter["total projects"] += 1 | |
if dep_link: | |
counter["projects dependency links"] += 1 | |
else: | |
counter["projects no dependency links"] += 1 | |
projects.add(project) | |
counter["total files"] += 1 | |
if dep_link: | |
counter["file has dependency links"] += 1 | |
else: | |
counter["file has no dependency links"] += 1 | |
print(counter) | |
with open("dependency_links.json", "w") as fjson: | |
json.dump(sorted(projects), fjson, indent=4) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment