Created
May 17, 2015 10:19
-
-
Save betaveros/cea1a430266ae8f124c8 to your computer and use it in GitHub Desktop.
magically fix (most of) this zip with corrupted filenames I got
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import codecs | |
import os | |
import shutil | |
import zipfile | |
def magic(fname): | |
if type(fname) == unicode: return fname | |
assert type(fname) == str | |
return codecs.decode(fname, 'shift-jis', 'replace') | |
with zipfile.ZipFile('Chinese.zip') as zipf: | |
for info in zipf.infolist(): | |
# check if it's a directory | |
# os.path.split splits it into (_, last pathname component) | |
if not os.path.split(info.filename)[1]: continue | |
magicname = magic(info.filename) | |
print magicname | |
outfname = os.path.join('out', magicname) | |
# https://stackoverflow.com/questions/12517451/python-automatically-creating-directories-with-file-output | |
if not os.path.exists(os.path.dirname(outfname)): | |
os.makedirs(os.path.dirname(outfname)) | |
with zipf.open(info) as inf: | |
with open(outfname, 'w') as outf: | |
shutil.copyfileobj(inf, outf) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment