Last active
December 6, 2019 12:21
-
-
Save sinchantsao/bf0aef6a83526b038d3194de0540bf03 to your computer and use it in GitHub Desktop.
解决zipfile解压中文压缩包乱码问题,以及文件加密判断,这里只做解压乱码的问题处理,未实现加密文件解压
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding=utf8 | |
# !/usr/bin/python2.7 | |
import chardet | |
import os | |
import zipfile | |
def decompressChar(string, encode=None): | |
try: | |
if isinstance(string, unicode): | |
string = string.encode("UTF8") | |
elif encode is not None: | |
string = unicode(string, encode) | |
else: | |
encodeTyp = chardet.detect(string)["encoding"] | |
if encodeTyp is not None: | |
string = string.decode(encodeTyp).encode("UTF8") | |
except UnicodeDecodeError as e: | |
# 出现此错误一般为GBK编码涉及范围不及GB18030 | |
if 'gb2312' in e: | |
string = string.decode('GB18030').encode("UTF8") | |
return string | |
def zipfilePreprocess(zipfilePath, ): | |
with zipfile.ZipFile(zipfilePath, ) as zipfileHandler: | |
# 检查zip文件是否进行了加密,此处可以参考源码的实现(对于加密文件不做处理) | |
# check zipfile whether encrypted or not(do nothing for encrypted file here) | |
# refer -> zipfile source code line#983 | |
if filter(lambda x: x.flag_bits & 0x01, zipfileHandler.infolist()): | |
return | |
# 提取zip文件当中的各文件名和对应的文件内容,其实质上文件内容就是以字典结构存储在dict当中 | |
# Extract each file name and corresponding file content in the zip file | |
members = [(f, member) for f, member in zipfileHandler.NameToInfo.iteritems()] | |
for subFname, member in members: | |
subFname = os.path.split(subFname)[-1] | |
# change path by yourself, default current path | |
subFilePath = os.path.join("./", decompressChar(subFname)) | |
# 通过文件写入的方式来对乱码处理,这是最关键的一步 | |
# 对于会出现zipfile模块解压zip文件乱码主要是winzip压缩的文件出现此类情况比较多,具体解释可以参考下面注释链接当中的内容说明 | |
# it is point for processing messy char | |
# do not use zipfile.extractall() to unzip,otherwise filename,contained chinese, will be a mess after that. zipped by WinZip specially. | |
# refer -> https://docs.python.org/2/library/zipfile.html#zipfile.ZipFile.write | |
# -> zipfile source line#1082 | |
with open(subFilePath, 'wb') as subFileHandler: | |
shutil.copyfileobj(zipfileHandler.open(member), subFileHandl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
程序没有实际运行,这段代码是我从自己的项目中抽取出来稍作修改后的版本,总体处理逻辑如此
The program is not actually running and this code is a slightly modified version that I extracted from my project.