Last active
December 7, 2020 09:54
-
-
Save Zwackelmann/97c5f4797c7db83e54fbf21d799a7f30 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
from typing import Dict, Tuple, List | |
import sys | |
import shutil | |
from argparse import ArgumentParser | |
def group_file_names(d: str) -> Dict[str, List[Tuple[str, str]]]: | |
""" | |
Groups file in a given directorie by their group names. It is assumed, that the files match the pattern | |
'{group-name}_{5-digit-sequence-nr}.{ext}'. Files not matching the pattern will be ignored | |
:param d: path to directory | |
:return: dict containing a key for each distinct group name. Each value will be a list with | |
(5-digit-sequence-nr, ext)-pairs. | |
""" | |
seqs = {} | |
for file in os.listdir(d): | |
match = re.match(r"(.*)_(\d{5})\.(.*)", file) | |
if not match: | |
print(f"WARNING: ignoring file {file} since it does not comply to the pattern", file=sys.stderr) | |
continue | |
seq_name = match.group(1) | |
seq_nr = match.group(2) | |
ext = match.group(3) | |
if seq_name not in seqs: | |
seqs[seq_name] = [] | |
seqs[seq_name].append((seq_nr, ext)) | |
# sort each group by the sequence number | |
seqs = {k: sorted(v, key=lambda x: int(x[0])) for k, v in seqs.items()} | |
return seqs | |
def group_mv(src: str, target: str, cut_seq_name=True) -> None: | |
""" | |
Groups files in `src` folder that comply to the pattern '{group-name}_{5-digit-sequence-nr}.{ext}' by their group | |
name. Then creates a sub folder in `target` folder for each distinct group name and moves the files from the | |
`src` folder to its respective group folder. | |
:param src: src folder | |
:param target: target folder | |
:param cut_seq_name: if set, the group name will be cut from the filenames in the target folders | |
""" | |
if not os.path.exists(src) or not os.path.isdir(src): | |
print(f"{src} not a directory", file=sys.stderr) | |
sys.exit(1) | |
seqs = group_file_names(src) | |
if not os.path.exists(target): | |
os.mkdir(target) | |
if not os.path.isdir(target): | |
print(f"{target} is not a directory", file=sys.stderr) | |
sys.exit(1) | |
for seq_name, files in seqs.items(): | |
seq_folder = os.path.join(target, seq_name) | |
os.mkdir(seq_folder) | |
for seq_nr, ext in files: | |
source_file_name = f"{seq_name}_{seq_nr}.{ext}" | |
if cut_seq_name: | |
target_file_name = f"{seq_nr}.{ext}" | |
else: | |
target_file_name = f"{seq_name}_{seq_nr}.{ext}" | |
shutil.move(os.path.join(src, source_file_name), | |
os.path.join(seq_folder, target_file_name)) | |
def main(): | |
parser = ArgumentParser(description="groups files in the `src` folder by their group name and moves all files of " | |
"one group to a new sub folder in `target`") | |
parser.add_argument("src", help="source folder containing files of pattern " | |
"'{group-name}_{5-digit-sequence-nr}.{ext}'") | |
parser.add_argument("target", help="target folder path") | |
parser.add_argument("-c", "--cut-seq-name", action="store_true", | |
help="only keep the sequence number instead of the full image name") | |
args = parser.parse_args() | |
group_mv(args.src, args.target, args.cut_seq_name) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment