sinchantsao/depressZipFile.py

## depressZipFile.py
# coding=utf8
# !/usr/bin/python2.7

import chardet
import os
import zipfile

def decompressChar(string, encode=None):
     try:
         if isinstance(string, unicode):
             string = string.encode("UTF8")
         elif encode is not None:
             string = unicode(string, encode)
         else:
             encodeTyp = chardet.detect(string)["encoding"]
             if encodeTyp is not None:
                 string = string.decode(encodeTyp).encode("UTF8")
     except UnicodeDecodeError as e:
         # 出现此错误一般为GBK编码涉及范围不及GB18030
         if 'gb2312' in e:
             string = string.decode('GB18030').encode("UTF8")
     return string


def zipfilePreprocess(zipfilePath, ):

    with zipfile.ZipFile(zipfilePath, ) as zipfileHandler:
        # 检查zip文件是否进行了加密,此处可以参考源码的实现(对于加密文件不做处理)
        # check zipfile whether encrypted or not(do nothing for encrypted file here)
        # refer -> zipfile source code line#983
        if filter(lambda x: x.flag_bits & 0x01, zipfileHandler.infolist()):
            return

        # 提取zip文件当中的各文件名和对应的文件内容,其实质上文件内容就是以字典结构存储在dict当中
        # Extract each file name and corresponding file content in the zip file
        members = [(f, member) for f, member in zipfileHandler.NameToInfo.iteritems()]
        for subFname, member in members:
            subFname = os.path.split(subFname)[-1]
            # change path by yourself, default current path
            subFilePath = os.path.join("./", decompressChar(subFname))
            # 通过文件写入的方式来对乱码处理,这是最关键的一步
            # 对于会出现zipfile模块解压zip文件乱码主要是winzip压缩的文件出现此类情况比较多,具体解释可以参考下面注释链接当中的内容说明
            # it is point for processing messy char
            # do not use zipfile.extractall() to unzip,otherwise filename,contained chinese, will be a mess after that. zipped by WinZip specially.
            # refer -> https://docs.python.org/2/library/zipfile.html#zipfile.ZipFile.write
            #       -> zipfile source line#1082
            with open(subFilePath, 'wb') as subFileHandler:
                shutil.copyfileobj(zipfileHandler.open(member), subFileHandl
	# coding=utf8
	# !/usr/bin/python2.7

	import chardet
	import os
	import zipfile

	def decompressChar(string, encode=None):
	try:
	if isinstance(string, unicode):
	string = string.encode("UTF8")
	elif encode is not None:
	string = unicode(string, encode)
	else:
	encodeTyp = chardet.detect(string)["encoding"]
	if encodeTyp is not None:
	string = string.decode(encodeTyp).encode("UTF8")
	except UnicodeDecodeError as e:
	# 出现此错误一般为GBK编码涉及范围不及GB18030
	if 'gb2312' in e:
	string = string.decode('GB18030').encode("UTF8")
	return string


	def zipfilePreprocess(zipfilePath, ):

	with zipfile.ZipFile(zipfilePath, ) as zipfileHandler:
	# 检查zip文件是否进行了加密,此处可以参考源码的实现(对于加密文件不做处理)
	# check zipfile whether encrypted or not(do nothing for encrypted file here)
	# refer -> zipfile source code line#983
	if filter(lambda x: x.flag_bits & 0x01, zipfileHandler.infolist()):
	return

	# 提取zip文件当中的各文件名和对应的文件内容,其实质上文件内容就是以字典结构存储在dict当中
	# Extract each file name and corresponding file content in the zip file
	members = [(f, member) for f, member in zipfileHandler.NameToInfo.iteritems()]
	for subFname, member in members:
	subFname = os.path.split(subFname)[-1]
	# change path by yourself, default current path
	subFilePath = os.path.join("./", decompressChar(subFname))
	# 通过文件写入的方式来对乱码处理,这是最关键的一步
	# 对于会出现zipfile模块解压zip文件乱码主要是winzip压缩的文件出现此类情况比较多,具体解释可以参考下面注释链接当中的内容说明
	# it is point for processing messy char
	# do not use zipfile.extractall() to unzip,otherwise filename,contained chinese, will be a mess after that. zipped by WinZip specially.
	# refer -> https://docs.python.org/2/library/zipfile.html#zipfile.ZipFile.write
	# -> zipfile source line#1082
	with open(subFilePath, 'wb') as subFileHandler:
	shutil.copyfileobj(zipfileHandler.open(member), subFileHandl