Skip to content

Instantly share code, notes, and snippets.

@yatt
Created March 25, 2011 22:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yatt/887745 to your computer and use it in GitHub Desktop.
Save yatt/887745 to your computer and use it in GitHub Desktop.
ZipInfo holds filename as bytes. this causes mojibake for zip has sjis encoded string. 2nd byte position of dame-moji is normalized from \ to / incorrectly. this module intercept assignment to hold filename as unicode and handle zip better.
# extended zipfile module to handle sjis damemoji well.
from zipfile import *
def cp932_invert(cp932_path):
from string import printable
def fun(i):
pred = i > 0 and cp932_path[i] == '/' and cp932_path[i-1] not in printable
return '\\' if pred else cp932_path[i]
lst = map(fun, range(len(cp932_path)))
uni = ''.join(lst).decode('cp932').replace('\\', '/')
return uni
def as_unicode_path(path):
if type(path) is unicode:
return path
for enc in ['utf-8', 'sjis', 'cp932', 'euc-jp', 'iso-2022-jp']:
try:
return path.decode(enc)
except:
pass
# assume cp932 encoding including dame-moji
try:
return cp932_invert(path)
except:
return path
def __setattr__(self, name, value):
if name == 'filename':
value = as_unicode_path(value)
object.__setattr__(self, name, value)
ZipInfo.__setattr__ = __setattr__
def infolist(self):
return filter(lambda info: info.filename[-1] != '/', self.filelist)
ZipFile.infolist = infolist
def namelist(self):
return map(lambda info: info.filename, self.infolist())
ZipFile.namelist = namelist
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment