Skip to content

Instantly share code, notes, and snippets.

@dfd
Created January 5, 2016 04:43
Show Gist options
  • Save dfd/c552c17d3a5e75da49b5 to your computer and use it in GitHub Desktop.
Save dfd/c552c17d3a5e75da49b5 to your computer and use it in GitHub Desktop.
# The scenario for this script is that I have a home server that crashed.
# The server used some duplication and balancing of data across 4 disk drives,
# 3 of which were still working. The files from all three were copied
# to seperate directories on a NAS, named HPSERVER1, HPSERVER2, and HPSERVER3.
# The directory structure on each disk mirrored each other, except empty folders
# were missing.
# This script was run to move the files and folders from all three disk drives
# into one consolidated folder (HPSERVER1). I manually deleted the other two
# after checking the result
import os, shutil
from os.path import isdir, isfile, join, getsize
import logging
print(join(os.path.dirname(__file__), 'example.log'))
logging.basicConfig(filename=join(
os.path.dirname(__file__),'consolidate.log'), level=logging.DEBUG)
STEM = '/share/homes/admin/'
MAIN = 'HPSERVER1'
OTHERS = ('HPSERVER2', 'HPSERVER3')
MAIN_DIR = STEM + MAIN
OTHER_DIRS = [STEM + OTHER for OTHER in OTHERS]
def get_files(directory):
""" Get all files from a directory (str). """
return [f for f in os.listdir(directory) if isfile(join(directory, f))]
def get_dirs(directory):
""" Get all directories from a directory (str). """
return [f for f in os.listdir(directory) if isdir(join(directory, f))]
def get_size(directory, filename):
""" Return the size of a file. """
try:
size = getsize(join(directory, filename))
except OSError:
size = -1
return size
def consolidate_files(main_dir, other_dirs):
""" identify the unique files across directories, and ensure the largest
version of the file is in the main directory. Then call itself with all
the directories in the next level. Create any directories that don't
exist before calling.
main_dir: a string representing the directory we want to move files too
other_dirs: a list with the main_dir's counterparts
"""
# collect a list of all files at this level
all_files = set([])
for other in other_dirs + [main_dir]:
all_files.update(get_files(other))
logging.debug(all_files)
# find the largest version in case some files were corrupted
for f in all_files:
largest_f = None
largest_size = 0
main_size = get_size(main_dir, f)
if main_size > largest_size:
largest_f = join(main_dir, f)
largest_size = main_size
for other in other_dirs:
size = get_size(other, f)
if size > largest_size:
largest_f = join(other, f)
largest_size = size
# only replace main_dir version if there's a bigger one
if largest_f and largest_size > main_size:
logging.debug("moving " + largest_f + " to " + join(main_dir))
shutil.move(largest_f, join(main_dir, f))
# collect a list of all directories at this level
main_folders = get_dirs(main_dir)
all_folders = set(main_folders)
for other in other_dirs:
all_folders.update(get_dirs(other))
logging.debug(all_folders)
for folder in list(all_folders):
if folder not in main_folders:
logging.debug("making dir: " + join(main_dir, folder))
os.mkdir(join(main_dir, folder))
for other in other_dirs:
if folder not in [f for f in os.listdir(other)
if isdir(join(other, f))]:
os.mkdir(join(other, folder))
consolidate_files(join(main_dir, folder), [join(other, folder)
for other in other_dirs])
consolidate_files(MAIN_DIR, OTHER_DIRS)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment