Skip to content

Instantly share code, notes, and snippets.

@wangjiezhe
Last active February 17, 2021 14:00
Show Gist options
  • Save wangjiezhe/7841a350983a147b6d7e to your computer and use it in GitHub Desktop.
Save wangjiezhe/7841a350983a147b6d7e to your computer and use it in GitHub Desktop.
解决zip文件中文乱码问题
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
unzip3_gbk.py: Deal with zip files using encoding GB2312/GBK/GB18030
"""
import os
# import sys
import argparse
import zipfile
# import copy
import datetime
class GBKZipFile(zipfile.ZipFile):
"""Class with methods to list, extract zip files using encoding GB18030."""
def __init__(self, filename):
super().__init__(filename, mode='r')
# self.filelist_old = copy.deepcopy(self.filelist)
# self.NameToInfo_old = copy.deepcopy(self.NameToInfo)
self.NameToInfo = {}
for zinfo in self.filelist:
zinfo.filename = zinfo.filename.encode('cp437').decode('gb18030')
self.NameToInfo[zinfo.filename] = zinfo
@staticmethod
def print_bold(text):
"""Print bold text."""
bold = '\033[1m'
endc = '\033[0m'
print(bold + text + endc)
def pprintdir(self):
"""Print a table of contents of the zip files more elegantly."""
self.print_bold('Archive: ' + os.path.basename(self.filename))
if self.comment:
self.print_bold('Comment: ' + self.comment.decode('gb18030'))
print('{:^10} {:^19} {}'.format('Size', 'Modified', 'File Name'))
print('{:=^10} {:=^19} {:=<11}'.format('', '', ''))
size_sum = 0
for zinfo in self.filelist:
filename = zinfo.filename
filetime = '{:%Y-%m-%d %H:%M:%S}'.format(
datetime.datetime(*zinfo.date_time))
print('{:>10} {} {}'.format(zinfo.file_size, filetime, filename))
size_sum += zinfo.file_size
file_sum = len(self.filelist)
print('{:-^10} {:^19} {:-^11}'.format('', '', ''))
print('{:>10} {:^19} {}'.format(str(size_sum), '',
str(file_sum) + ' files'))
def cenc(name):
"""Check if it's not None and encode."""
return name is not None and name.encode() or None
class MyParser(argparse.ArgumentParser):
"""Paring command line options."""
def __init__(self, prog=None):
description = 'Extract files from zipfiles using encoding GBK'
super().__init__(prog=prog,
description=description)
self.add_argument('zipfile', nargs='+')
self.add_argument('-l', '--list', action='store_true', dest='islist',
help='list files in zipfiles')
self.add_argument('-o', '--outdir', dest='outdir',
help='set output directory')
self.add_argument('-p', '--password', dest='password',
help='set password')
def main():
"""Parse argument, list or extract zip files."""
myparser = MyParser()
args = myparser.parse_args()
if args.islist:
for zfile in args.zipfile:
with GBKZipFile(zfile) as zfp:
if args.password:
zfp.setpassword(cenc(args.password))
zfp.pprintdir()
else:
for zfile in args.zipfile:
with GBKZipFile(zfile) as zfp:
zfp.extractall(path=args.outdir, pwd=cenc(args.password))
if __name__ == '__main__':
main()
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# unzip_gbk.py
"""
解决用 gbk 编码压缩的 zip 文件在 utf-8 环境下解压产生的中文文件名乱码问题
"""
from __future__ import print_function
import os
import sys
import getopt
from zipfile import ZipFile
from textwrap import dedent
IFLIST = False
def usage():
"""显示帮助"""
help_text = """\
Usage: %s [options] zipfile1 [zipfile2 ...]
Options:
-h --help : display this help
-l --list : list files
-o --outdir : set output directory
-p --password : set password""" % sys.argv[0]
print(dedent(help_text))
def analyse(args=sys.argv[1:]):
"""解析命令行参数, 返回输出文件夹, 解压密码和待解压文件"""
shortargs = "hlo:p:"
longargs = ["help", "list", "outdir=", "password="]
outdir = os.getcwdu()
password = None
try:
opts, zipfiles = getopt.getopt(args, shortargs, longargs)
except getopt.GetoptError:
print("Getopt error!")
usage()
sys.exit(1)
for opt, value in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
elif opt in ("-l", "--list"):
global IFLIST
IFLIST = True
elif opt in ("-o", "--outdir"):
outdir = value.decode('utf8')
elif opt in ("-p", "--password"):
password = value
return outdir, password, zipfiles
def listzip(filename, password=None):
"""列出文件内容"""
print("Archive: " + filename)
with ZipFile(filename, 'r') as infile:
if password:
infile.setpassword(password)
for name in infile.namelist():
utf8name = name.decode('gbk')
print(utf8name)
def unzip(filename, outdir='', password=None):
"""解压文件"""
print("Unziping " + filename)
with ZipFile(filename, "r") as infile:
if password:
infile.setpassword(password)
for name in infile.namelist():
utf8name = name.decode('gbk')
print("Extracting " + utf8name)
pathname = os.path.join(outdir, os.path.dirname(utf8name))
targetname = os.path.join(outdir, utf8name)
if not os.path.exists(pathname):
os.makedirs(pathname)
data = infile.read(name)
if not os.path.exists(targetname):
with open(targetname, 'w') as myfile:
myfile.write(data)
def main():
"""主程序"""
outdir, password, zipfiles = analyse()
if not zipfiles:
print("No file to unzip.")
usage()
sys.exit()
if IFLIST:
for filename in zipfiles:
listzip(filename, password)
else:
for filename in zipfiles:
unzip(filename, outdir, password)
sys.exit()
if __name__ == "__main__":
main()
@earthGavinLee
Copy link

压缩包解压的时候提示
Traceback (most recent call last):
File "./unzip-chn-1.py", line 91, in
main()
File "./unzip-chn-1.py", line 87, in main
zfp.extractall(path=args.outdir, pwd=cenc(args.password))
File "/usr/lib/python3.2/zipfile.py", line 1024, in extractall
self.extract(zipinfo, path, pwd)
File "/usr/lib/python3.2/zipfile.py", line 1012, in extract
return self._extract_member(member, path, pwd)
File "/usr/lib/python3.2/zipfile.py", line 1057, in _extract_member
shutil.copyfileobj(source, target)
File "/usr/lib/python3.2/shutil.py", line 65, in copyfileobj
buf = fsrc.read(length)
File "/usr/lib/python3.2/zipfile.py", line 585, in read
data = self.read1(n - len(buf))
File "/usr/lib/python3.2/zipfile.py", line 625, in read1
self._update_crc(data, eof=(self._compress_left==0))
File "/usr/lib/python3.2/zipfile.py", line 600, in _update_crc
raise BadZipFile("Bad CRC-32 for file %r" % self.name)
zipfile.BadZipFile: Bad CRC-32 for file '盒子/box.zip'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment