Skip to content

Instantly share code, notes, and snippets.

@chapmanjacobd
Created June 18, 2023 19:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chapmanjacobd/7f6c4bac0ebbd397be9d14306da34f92 to your computer and use it in GitHub Desktop.
Save chapmanjacobd/7f6c4bac0ebbd397be9d14306da34f92 to your computer and use it in GitHub Desktop.
def rebin_folders(paths, max_files_per_folder=16000):
parent_paths = Counter(Path(p).parent for p in paths)
rebin_parents = set(k for k,v in parent_paths.items() if v > max_files_per_folder)
rebinned = set(p for p in paths if Path(p).parent in rebin_parents)
untouched = set(paths) - rebinned
rebinned_tuples = []
chunked = list(chunks(list(rebinned), max_files_per_folder))
min_len = len(str(len(chunked) + 1)) # start at 1
for i, chunked_paths in enumerate(chunked, start=1):
rebinned_tuples.extend([(p, str(Path(p).parent / str(i).zfill(min_len) / Path(p).name)) for p in chunked_paths])
return untouched, rebinned_tuples
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment