Skip to content

Instantly share code, notes, and snippets.

@nik312123
Created August 3, 2023 20:50
Show Gist options
  • Save nik312123/8a2ea1e0aeb14c6cad0d49fd5772220d to your computer and use it in GitHub Desktop.
Save nik312123/8a2ea1e0aeb14c6cad0d49fd5772220d to your computer and use it in GitHub Desktop.
Splits a folder into zip files of a certain MB size or lower. If a file is larger than the limit, then it will have its own zip file.
#!/usr/bin/env python3
# Copyright Nikunj Chawla 2023
import sys
import os
import re
from zipfile import ZipFile, ZIP_DEFLATED
from math import ceil
zipfile_regex = re.compile(r"^zip_\d+\.zip$")
if len(sys.argv) != 3:
print("Usage: python split.py [pathToDir] [splitSizeMb]")
sys.exit(1)
dir_path = sys.argv[1]
try:
split_size = int(sys.argv[2]) * 1024 * 1024
except ValueError:
print("Second argument must be integer")
sys.exit(1)
num_digits = 0
cur_file_size = 0
cur_files = []
zip_file_counter = 0
total_size = 0
def zip_files() -> None:
global zip_file_counter
global cur_files
with ZipFile(os.path.join(dir_path, f"zip_{str(zip_file_counter).zfill(num_digits)}.zip"), "w") as zipf:
while cur_files:
zipf.write(cur_files.pop(), compress_type = ZIP_DEFLATED)
print(f"Zip {str(zip_file_counter).zfill(num_digits)} completed")
zip_file_counter += 1
for root, _, files in os.walk(dir_path):
for file in files:
total_size += os.path.getsize(os.path.join(root, file))
splits = max(0, ceil(total_size / split_size) - 1)
num_digits = len(str(splits))
for root, _, files in os.walk(dir_path):
for file in files:
if zipfile_regex.match(file):
continue
full_path = os.path.join(root, file)
file_size = os.path.getsize(full_path)
if cur_file_size + file_size <= split_size:
cur_file_size += file_size
else:
zip_files()
cur_file_size = file_size
cur_files.append(full_path)
if cur_files:
zip_files()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment