Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
#!/usr/bin/env python3
"""Repacks entries in a ZIP file so that they become correctly zipimportable \
in Python 3.5. See for details.
import argparse
from pathlib import Path
import importlib._bootstrap_external
import sys
import zipfile
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
help='paths to existing ZIP files',
'--strip', '-s',
help='remove .py files when a corresponding .pyc file is present',
return parser.parse_args()
def is_suffix_valid(suffix):
"""We're stripping parts of the filename that are put by PEP-3147."""
for invalid in ('.cpython-', '.pypy-', '.opt-', '.pyc', '.pyo'):
if suffix.startswith(invalid):
return False
return True
def redo_path(path):
"""redo_path(Path) -> Path
Converts paths like fb303/__pycache__/__init__.cpython-35.pyc into
stem = Path(path.stem)
while not is_suffix_valid(stem.suffix):
stem = Path(stem.stem)
assert == '__pycache__', (
'Unexpected subdirectories in __pycache__: {}'.format(path.parent),
return path.parent.with_name( + '.pyc')
def scan_file(path, strip=False, compression=zipfile.ZIP_DEFLATED):
"""scan_file(Path('')) -> Path('')
Repackages a ZIP file so that it's zip-importable by Python 3.5.
If `strip` is True, .py files are removed when corresponding .pyc files
can be found.
File order and all attributes of the ZIP entries are maintained, except
for the compression method which is always set to `compression`.
pure_py_files = set()
cached_py_files = set()
renames = {}
suffix = path.suffix
zip_offset = -1
with'rb') as zf:
z = zipfile.ZipFile(zf)
for info in z.infolist():
p = Path(info.filename)
if p.match('*.py'):
elif p.match('**/__pycache__/*') or p.match('__pycache__/*'):
new_path = redo_path(p)
info.filename = str(new_path)
renames[p] = info
if zip_offset == -1:
zip_offset = info.header_offset
print('Renames to be done:', len(renames))
print('.py files:', len(pure_py_files))
print('.pyc files:', len(cached_py_files))
without_cache = pure_py_files - cached_py_files
print('.py files without cache:', len(without_cache))
for p in sorted(without_cache):
print(' ', p)
without_source = cached_py_files - pure_py_files
print('.pyc files without source:', len(without_source))
for p in sorted(without_source):
print(' ', p)
new_path = path.with_suffix('.new' + suffix)
with'rb') as sf:
preamble = b''
while len(preamble) < zip_offset:
preamble +=
with'wb') as tf:
sz = zipfile.ZipFile(sf)
with zipfile.ZipFile(str(new_path), 'a', compression=compression) as tz:
for info in sz.infolist():
p = Path(info.filename)
if strip and p.match('*.py') and p in cached_py_files:
content =
info = renames.get(p, info)
info.compress_type = compression
tz.writestr(info, content)
return new_path
if __name__ == '__main__':
args = parse_args()
failures = 0
for fp in args.zipfiles:
scan_file(fp, strip=args.strip)
except OSError:
failures += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment