Instantly share code, notes, and snippets.

What would you like to do?
ZipInfo holds filename as bytes. this causes mojibake for zip has sjis encoded string. 2nd byte position of dame-moji is normalized from \ to / incorrectly. this module intercept assignment to hold filename as unicode and handle zip better.
# extended zipfile module to handle sjis damemoji well.
from zipfile import *
def cp932_invert(cp932_path):
from string import printable
def fun(i):
pred = i > 0 and cp932_path[i] == '/' and cp932_path[i-1] not in printable
return '\\' if pred else cp932_path[i]
lst = map(fun, range(len(cp932_path)))
uni = ''.join(lst).decode('cp932').replace('\\', '/')
return uni
def as_unicode_path(path):
if type(path) is unicode:
return path
for enc in ['utf-8', 'sjis', 'cp932', 'euc-jp', 'iso-2022-jp']:
return path.decode(enc)
# assume cp932 encoding including dame-moji
return cp932_invert(path)
return path
def __setattr__(self, name, value):
if name == 'filename':
value = as_unicode_path(value)
object.__setattr__(self, name, value)
ZipInfo.__setattr__ = __setattr__
def infolist(self):
return filter(lambda info: info.filename[-1] != '/', self.filelist)
ZipFile.infolist = infolist
def namelist(self):
return map(lambda info: info.filename, self.infolist())
ZipFile.namelist = namelist
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment