Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
# coding=utf8
# !/usr/bin/python2.7
import chardet
import os
import zipfile
def decompressChar(string, encode=None):
if isinstance(string, unicode):
string = string.encode("UTF8")
elif encode is not None:
string = unicode(string, encode)
encodeTyp = chardet.detect(string)["encoding"]
if encodeTyp is not None:
string = string.decode(encodeTyp).encode("UTF8")
except UnicodeDecodeError as e:
# 出现此错误一般为GBK编码涉及范围不及GB18030
if 'gb2312' in e:
string = string.decode('GB18030').encode("UTF8")
return string
def zipfilePreprocess(zipfilePath, ):
with zipfile.ZipFile(zipfilePath, ) as zipfileHandler:
# 检查zip文件是否进行了加密,此处可以参考源码的实现(对于加密文件不做处理)
# check zipfile whether encrypted or not(do nothing for encrypted file here)
# refer -> zipfile source code line#983
if filter(lambda x: x.flag_bits & 0x01, zipfileHandler.infolist()):
# 提取zip文件当中的各文件名和对应的文件内容,其实质上文件内容就是以字典结构存储在dict当中
# Extract each file name and corresponding file content in the zip file
members = [(f, member) for f, member in zipfileHandler.NameToInfo.iteritems()]
for subFname, member in members:
subFname = os.path.split(subFname)[-1]
# change path by yourself, default current path
subFilePath = os.path.join("./", decompressChar(subFname))
# 通过文件写入的方式来对乱码处理,这是最关键的一步
# 对于会出现zipfile模块解压zip文件乱码主要是winzip压缩的文件出现此类情况比较多,具体解释可以参考下面注释链接当中的内容说明
# it is point for processing messy char
# do not use zipfile.extractall() to unzip,otherwise filename,contained chinese, will be a mess after that. zipped by WinZip specially.
# refer ->
# -> zipfile source line#1082
with open(subFilePath, 'wb') as subFileHandler:
shutil.copyfileobj(, subFileHandl

This comment has been minimized.

Copy link
Owner Author

@sinchantsao sinchantsao commented Dec 6, 2019

The program is not actually running and this code is a slightly modified version that I extracted from my project.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment