Created
May 27, 2024 20:43
-
-
Save JellyTitan/fe1e5e9aef44d3fb92704928eb7d838b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
import zipfile | |
# Loops through nested zip folders and extracts all files to output folder. | |
# Intended to simplify the translation ETL process. | |
# Update the zip_file_path with the name of the source .zip file to be unpacked. | |
zip_file_path = './test_data_set.zip' | |
destination_folder_path = './test_output' | |
def unzip_and_copy_files(zip_file_path, destination_folder_path): | |
# Create output folder | |
os.makedirs(destination_folder_path, exist_ok=True) | |
# Create a temporary folder | |
temp_folder = 'temp_folder' | |
os.makedirs(temp_folder, exist_ok=True) | |
# Recursive function to handle nested zip files | |
def extract_zip(file_path, extract_to): | |
with zipfile.ZipFile(file_path, 'r') as zip_ref: | |
zip_ref.extractall(extract_to) | |
for name in zip_ref.namelist(): | |
if name.endswith('.zip'): | |
extract_zip(os.path.join(extract_to, name), extract_to) | |
# Start extraction | |
extract_zip(zip_file_path, temp_folder) | |
# Copy all files (not directories) to the destination folder | |
for root, dirs, files in os.walk(temp_folder): | |
for file in files: | |
if not file.endswith('.zip'): # Skip zip files | |
shutil.copy(os.path.join(root, file), destination_folder_path) | |
# Clean up the temporary folder | |
shutil.rmtree(temp_folder) | |
# Usage | |
unzip_and_copy_files(zip_file_path, destination_folder_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment