Skip to content

Instantly share code, notes, and snippets.

@absindx
Created May 4, 2024 15:46
Show Gist options
  • Save absindx/272e2ac0f782b3dc521f4ce23b80da28 to your computer and use it in GitHub Desktop.
Save absindx/272e2ac0f782b3dc521f4ce23b80da28 to your computer and use it in GitHub Desktop.
Extract a Zip file by specifying file name encoding.
#--------------------------------------------------
# extract_zip_filename_encoding.py
# Extract a Zip file by specifying file name encoding.
#--------------------------------------------------
import argparse
import datetime
import glob
import os
import re
import sys
import zipfile
from typing import Callable
#--------------------------------------------------
interpreted_filename_encoding = 'cp437'
extract_filename_encoding = 'utf-8'
#--------------------------------------------------
def decode_text(text: str) -> str | None:
try:
decoded_text = text.encode(interpreted_filename_encoding).decode(extract_filename_encoding)
return decoded_text
except:
return None
def replace_directory_separator(path: str) -> str:
ZIP_SEPARATOR = '/'
if os.sep != ZIP_SEPARATOR:
path = path.replace(os.sep, ZIP_SEPARATOR)
return path
def extract_zip_keep_timestamp(zip: zipfile.ZipFile, info: zipfile.ZipInfo, base_path: str = '') -> bool:
try:
# extract file
file = zip.extract(info, base_path)
# ZipInfo.date_time to datetime
timestamp = datetime.datetime(*info.date_time[0:5])
epoch_time = timestamp.timestamp()
# set timestamp
os.utime(file, (epoch_time, epoch_time))
return True
except:
return False
def extract_zip(file: str, filename_converter: Callable[[str, str], str] | None = None) -> bool:
try:
base_path = os.path.dirname(file)
directory_name = None
with zipfile.ZipFile(file) as zip:
for compressed_file in zip.infolist():
# get file name
filename = decode_text(compressed_file.orig_filename)
if not filename:
print(f'[WARNING] Failed to convert file name. skipped. ("{compressed_file.orig_filename}")')
continue
filename = replace_directory_separator(filename)
# notify file name converter
if filename_converter:
filename = filename_converter(file, filename)
# output directory or file name
output_directory = os.path.dirname(filename)
if len(output_directory) > 0:
if directory_name != output_directory:
directory_name = output_directory
print(f'[INFO] Extract directory name is "{directory_name}".')
else:
print(f'[INFO] Extract file name is "{filename}".')
# extract
print(f'[INFO] Extract file... "{filename}".')
compressed_file.filename = filename
extract_zip_keep_timestamp(zip, compressed_file, base_path)
return True
except:
return False
#--------------------------------------------------
def extract_list(files: list[str]) -> bool:
result = True
for file in files:
print('-' * 50)
print(f'[INFO] Extract zip file... ("{file}")')
extract_result = extract_zip(file)
result &= extract_result
if extract_result:
print(f'[INFO] Zip file was successfully extracted.')
else:
print(f'[ERROR] Failed to extract the file.')
return result
def extract_all() -> bool:
extract_files = glob.glob('*.zip')
if len(extract_files) == 0:
print(f'[WARNING] Zip file does not exist.')
return extract_list(extract_files)
#--------------------------------------------------
if __name__ == "__main__":
# arguments
parser = argparse.ArgumentParser()
parser.add_argument('file', nargs='*', help='Zip file to extract. (multiple can be specified. If not specified, all ZIP files in the current directory will be extracted.)')
parser.add_argument('-i', '--in', default='cp437', help='Character encoding stored in zip file. (default="cp437")')
parser.add_argument('-o', '--out', required=True, help='Character encoding for extracting zip file.')
args = parser.parse_args()
interpreted_filename_encoding = getattr(args, 'in') # args.in
extract_filename_encoding = args.out
result = False
if len(args.file) > 0:
result = extract_list(args.file)
else:
print(f'[INFO] Extract all ZIP files in the current directory.')
result = extract_all()
exitcode = 0 if result else 1
sys.exit(exitcode)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment