Created
May 4, 2024 15:46
-
-
Save absindx/272e2ac0f782b3dc521f4ce23b80da28 to your computer and use it in GitHub Desktop.
Extract a Zip file by specifying file name encoding.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-------------------------------------------------- | |
# extract_zip_filename_encoding.py | |
# Extract a Zip file by specifying file name encoding. | |
#-------------------------------------------------- | |
import argparse | |
import datetime | |
import glob | |
import os | |
import re | |
import sys | |
import zipfile | |
from typing import Callable | |
#-------------------------------------------------- | |
interpreted_filename_encoding = 'cp437' | |
extract_filename_encoding = 'utf-8' | |
#-------------------------------------------------- | |
def decode_text(text: str) -> str | None: | |
try: | |
decoded_text = text.encode(interpreted_filename_encoding).decode(extract_filename_encoding) | |
return decoded_text | |
except: | |
return None | |
def replace_directory_separator(path: str) -> str: | |
ZIP_SEPARATOR = '/' | |
if os.sep != ZIP_SEPARATOR: | |
path = path.replace(os.sep, ZIP_SEPARATOR) | |
return path | |
def extract_zip_keep_timestamp(zip: zipfile.ZipFile, info: zipfile.ZipInfo, base_path: str = '') -> bool: | |
try: | |
# extract file | |
file = zip.extract(info, base_path) | |
# ZipInfo.date_time to datetime | |
timestamp = datetime.datetime(*info.date_time[0:5]) | |
epoch_time = timestamp.timestamp() | |
# set timestamp | |
os.utime(file, (epoch_time, epoch_time)) | |
return True | |
except: | |
return False | |
def extract_zip(file: str, filename_converter: Callable[[str, str], str] | None = None) -> bool: | |
try: | |
base_path = os.path.dirname(file) | |
directory_name = None | |
with zipfile.ZipFile(file) as zip: | |
for compressed_file in zip.infolist(): | |
# get file name | |
filename = decode_text(compressed_file.orig_filename) | |
if not filename: | |
print(f'[WARNING] Failed to convert file name. skipped. ("{compressed_file.orig_filename}")') | |
continue | |
filename = replace_directory_separator(filename) | |
# notify file name converter | |
if filename_converter: | |
filename = filename_converter(file, filename) | |
# output directory or file name | |
output_directory = os.path.dirname(filename) | |
if len(output_directory) > 0: | |
if directory_name != output_directory: | |
directory_name = output_directory | |
print(f'[INFO] Extract directory name is "{directory_name}".') | |
else: | |
print(f'[INFO] Extract file name is "{filename}".') | |
# extract | |
print(f'[INFO] Extract file... "{filename}".') | |
compressed_file.filename = filename | |
extract_zip_keep_timestamp(zip, compressed_file, base_path) | |
return True | |
except: | |
return False | |
#-------------------------------------------------- | |
def extract_list(files: list[str]) -> bool: | |
result = True | |
for file in files: | |
print('-' * 50) | |
print(f'[INFO] Extract zip file... ("{file}")') | |
extract_result = extract_zip(file) | |
result &= extract_result | |
if extract_result: | |
print(f'[INFO] Zip file was successfully extracted.') | |
else: | |
print(f'[ERROR] Failed to extract the file.') | |
return result | |
def extract_all() -> bool: | |
extract_files = glob.glob('*.zip') | |
if len(extract_files) == 0: | |
print(f'[WARNING] Zip file does not exist.') | |
return extract_list(extract_files) | |
#-------------------------------------------------- | |
if __name__ == "__main__": | |
# arguments | |
parser = argparse.ArgumentParser() | |
parser.add_argument('file', nargs='*', help='Zip file to extract. (multiple can be specified. If not specified, all ZIP files in the current directory will be extracted.)') | |
parser.add_argument('-i', '--in', default='cp437', help='Character encoding stored in zip file. (default="cp437")') | |
parser.add_argument('-o', '--out', required=True, help='Character encoding for extracting zip file.') | |
args = parser.parse_args() | |
interpreted_filename_encoding = getattr(args, 'in') # args.in | |
extract_filename_encoding = args.out | |
result = False | |
if len(args.file) > 0: | |
result = extract_list(args.file) | |
else: | |
print(f'[INFO] Extract all ZIP files in the current directory.') | |
result = extract_all() | |
exitcode = 0 if result else 1 | |
sys.exit(exitcode) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment