Skip to content

Instantly share code, notes, and snippets.

@whinette
Created January 2, 2019 11:23
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save whinette/a46ccb980d93ee8189c2c6629a7e2e4c to your computer and use it in GitHub Desktop.
Save whinette/a46ccb980d93ee8189c2c6629a7e2e4c to your computer and use it in GitHub Desktop.
tailored script to cleanup mess from pixiv
# -*- coding: utf-8 -*-
import os, sys
from stat import ST_MTIME
import distutils
from distutils import dir_util
import shutil
import logging
from datetime import datetime
g_logger = logging.getLogger()
g_logger.setLevel(logging.INFO)
def merge_subfolders(subfolders):
for f in subfolders:
if f == subfolders[-1]:
break
g_logger.info('{} -> {}'.format(f, subfolders[-1]))
if f.lower() == subfolders[-1].lower():
g_logger.info('Same name! {} | {}'.format(f, subfolders[-1]))
distutils.dir_util.copy_tree(f, os.path.join('/tmp', f), verbose=1, update=1)
distutils.dir_util.copy_tree(os.path.join('/tmp', f), subfolders[-1], verbose=1, update=1)
shutil.rmtree(f)
else:
# distutils.dir_util.copy_tree(f, subfolders[-1], verbose=1, update=1)
for src_dir, dirs, files in os.walk(f):
dst_dir = src_dir.replace(f, subfolders[-1], 1)
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
for file_ in files:
src_file = os.path.join(src_dir, file_)
dst_file = os.path.join(dst_dir, file_)
if os.path.exists(dst_file):
os.remove(dst_file)
shutil.move(src_file, dst_dir)
if f is not subfolders[-1]:
shutil.rmtree(f)
def merge_dupe_users_subfolder(home):
g_logger.info('---=== Merging subfolders ===---')
total = len(home) - 1
i = 0
for d in home:
if d == '.duplicates':
continue
i += 1
subfolders = [os.path.join(d, x) for x in os.listdir(d) if os.path.isdir(os.path.join(d, x))]
if len(subfolders) > 1:
g_logger.info('{}/{} merging {}'.format(i, total, d))
subfolders.sort(key=os.path.getmtime)
merge_subfolders(subfolders)
g_logger.info('done!')
g_logger.info('---=== Subfolders done! ===---')
def merge_dupe_users(home):
g_logger.info('---=== Merging dupes users ===---')
total = len(home) - 1
i = 0
ret = 0
old_tup = ('', '', 0)
for d in home:
i += 1
new_tup = (d, os.stat(d)[ST_MTIME], d.split(' ')[0])
if new_tup[2] == old_tup[2]:
ret = 1
g_logger.info('{}/{} found match with {} ({}) and {} ({})'.format(i, total, new_tup[0], datetime.utcfromtimestamp(new_tup[1]).strftime('%Y-%m-%d %H:%M:%S'), old_tup[0], datetime.utcfromtimestamp(old_tup[1]).strftime('%Y-%m-%d %H:%M:%S')))
if new_tup[1] > old_tup[1]:
g_logger.info('keeping {}'.format(new_tup[0]))
distutils.dir_util.copy_tree(old_tup[0], new_tup[0], verbose=1, update=1)
shutil.rmtree(old_tup[0])
else:
g_logger.info('keeping {}'.format(old_tup[0]))
distutils.dir_util.copy_tree(new_tup[0], old_tup[0], verbose=1, update=1)
shutil.rmtree(new_tup[0])
g_logger.info('done!')
old_tup = new_tup
g_logger.info('---=== Dupe users done! ===---')
return ret
if __name__ == '__main__':
formatter = logging.Formatter('[%(asctime)s] - %(message)s')
fh = logging.FileHandler('cleanup_{:%Y-%m-%d}.log'.format(datetime.now()))
ch = logging.StreamHandler(sys.stdout)
fh.setFormatter(formatter)
ch.setFormatter(formatter)
g_logger.addHandler(fh)
g_logger.addHandler(ch)
g_logger.info('-' * 80)
home = sorted([x for x in os.listdir(os.getcwd()) if os.path.isdir(x)], key=lambda k: int(k.split()[0]))
changed = merge_dupe_users(home)
if changed:
home = sorted([x for x in os.listdir(os.getcwd()) if os.path.isdir(x)], key=lambda k: int(k.split()[0]))
merge_dupe_users_subfolder(home)
@whinette
Copy link
Author

whinette commented Jan 2, 2019

python2.

filenameformat = %artist% (%member_id%)/%urlFilename% - %title%
filenamemangaformat = %artist% (%member_id%)/%urlFilename% - %title%
filenameinfoformat = %artist% (%member_id%)/%urlFilename% - %title%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment