Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Last active September 30, 2021 07:39
Show Gist options
  • Save UserUnknownFactor/4003c2110d2eecf8286ca6a7b245d831 to your computer and use it in GitHub Desktop.
Save UserUnknownFactor/4003c2110d2eecf8286ca6a7b245d831 to your computer and use it in GitHub Desktop.
Python utility to unpack zips with unusual file name encoding
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# unzip-enc.py
import os, sys, argparse, locale, zipfile, codecs
from charamel import Detector
def setup_console(sys_enc="utf-8"):
try:
if sys.version_info >= (3, 4):
importlib.reload(sys)
elif sys.version_info >= (3, 0):
imp.reload(sys)
else:
reload(sys)
if sys.platform.startswith("win"):
import ctypes
enc = "cp{}".format(ctypes.windll.kernel32.GetOEMCP())
else:
enc = (sys.stdout.encoding if sys.stdout.isatty() else
sys.stderr.encoding if sys.stderr.isatty() else
sys.getfilesystemencoding() or sys_enc)
sys.setdefaultencoding(sys_enc)
if sys.stdout.isatty() and sys.stdout.encoding != enc:
sys.stdout = codecs.getwriter(enc)(sys.stdout, 'replace')
if sys.stderr.isatty() and sys.stderr.encoding != enc:
sys.stderr = codecs.getwriter(enc)(sys.stderr, 'replace')
return enc
except:
pass
console_chs = setup_console()
parser = argparse.ArgumentParser()
parser.add_argument("-e", help="encoding for file names like cp932, autodetected by default", metavar=('encoding'))
parser.add_argument("-l", help="list file names in a zip file but don't unzip", action="store_true")
parser.add_argument("file", help="zip archive to unpack", metavar=('filename'))
args = parser.parse_args()
zfn = args.file #.decode(console_chs)
print("Processing " + zfn + " ...")
detector = Detector()
with zipfile.ZipFile(zfn, "r") as zf:
for name in zf.namelist():
rawname = name.encode('cp437', 'ignore')
enc = args.e if args.e else detector.detect(rawname)
try:
utf8name=rawname.decode(enc)
except:
print("Error [" + enc + "] " + name)
utf8name = name
if args.l:
print("[" + enc + "] " + name)
else:
print("Extracting [" + enc + "]: " + utf8name)
pathname = os.path.dirname(utf8name)
if not os.path.exists(pathname) and pathname!= "":
os.makedirs(pathname)
if not os.path.exists(utf8name):
with open(utf8name, 'wb') as fout:
fout.write(zf.read(name))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment