Skip to content

Instantly share code, notes, and snippets.

@gilinachum
Created December 29, 2018 16:17
Show Gist options
  • Save gilinachum/a07407a3cb2b3ec7a642354f1f109f89 to your computer and use it in GitHub Desktop.
Save gilinachum/a07407a3cb2b3ec7a642354f1f109f89 to your computer and use it in GitHub Desktop.
splits a folder with millions of files to mini folders by creating hardlinks
#!python3
import os
import traceback
import datetime
source_base_path = '/tmp/sourcedir'
target_base_path = '/tmp/targetdir/'
current_mini_folder = 0
files_counter = 0
files_per_mini_folder = 10_000
errors = 0
def create_mini_folder(index):
new_folder = target_base_path + str(index)
print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") +' - Creating a new mini folder: ' + new_folder)
os.mkdir(new_folder)
for source_file in os.scandir(source_base_path):
try:
if(files_counter % files_per_mini_folder == 0):
current_mini_folder += 1
create_mini_folder(current_mini_folder)
target_file_path = target_base_path + str(current_mini_folder) + '/' + source_file.path.replace(source_base_path+'/', '')
os.link(source_file, target_file_path)
print(str(files_counter) +': Hard linking: ' + source_file.path + ' -> ' + target_file_path)
files_counter += 1
except Exception as e:
print("type error: " + str(e))
print(traceback.format_exc())
errors += 1
print('Done! files_counter=' + str(files_counter) + ', current_mini_folder=' + str(current_mini_folder) + ', errors=' + str(errors))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment