Skip to content

Instantly share code, notes, and snippets.

@Zwackelmann
Last active December 7, 2020 09:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Zwackelmann/97c5f4797c7db83e54fbf21d799a7f30 to your computer and use it in GitHub Desktop.
Save Zwackelmann/97c5f4797c7db83e54fbf21d799a7f30 to your computer and use it in GitHub Desktop.
import re
import os
from typing import Dict, Tuple, List
import sys
import shutil
from argparse import ArgumentParser
def group_file_names(d: str) -> Dict[str, List[Tuple[str, str]]]:
"""
Groups file in a given directorie by their group names. It is assumed, that the files match the pattern
'{group-name}_{5-digit-sequence-nr}.{ext}'. Files not matching the pattern will be ignored
:param d: path to directory
:return: dict containing a key for each distinct group name. Each value will be a list with
(5-digit-sequence-nr, ext)-pairs.
"""
seqs = {}
for file in os.listdir(d):
match = re.match(r"(.*)_(\d{5})\.(.*)", file)
if not match:
print(f"WARNING: ignoring file {file} since it does not comply to the pattern", file=sys.stderr)
continue
seq_name = match.group(1)
seq_nr = match.group(2)
ext = match.group(3)
if seq_name not in seqs:
seqs[seq_name] = []
seqs[seq_name].append((seq_nr, ext))
# sort each group by the sequence number
seqs = {k: sorted(v, key=lambda x: int(x[0])) for k, v in seqs.items()}
return seqs
def group_mv(src: str, target: str, cut_seq_name=True) -> None:
"""
Groups files in `src` folder that comply to the pattern '{group-name}_{5-digit-sequence-nr}.{ext}' by their group
name. Then creates a sub folder in `target` folder for each distinct group name and moves the files from the
`src` folder to its respective group folder.
:param src: src folder
:param target: target folder
:param cut_seq_name: if set, the group name will be cut from the filenames in the target folders
"""
if not os.path.exists(src) or not os.path.isdir(src):
print(f"{src} not a directory", file=sys.stderr)
sys.exit(1)
seqs = group_file_names(src)
if not os.path.exists(target):
os.mkdir(target)
if not os.path.isdir(target):
print(f"{target} is not a directory", file=sys.stderr)
sys.exit(1)
for seq_name, files in seqs.items():
seq_folder = os.path.join(target, seq_name)
os.mkdir(seq_folder)
for seq_nr, ext in files:
source_file_name = f"{seq_name}_{seq_nr}.{ext}"
if cut_seq_name:
target_file_name = f"{seq_nr}.{ext}"
else:
target_file_name = f"{seq_name}_{seq_nr}.{ext}"
shutil.move(os.path.join(src, source_file_name),
os.path.join(seq_folder, target_file_name))
def main():
parser = ArgumentParser(description="groups files in the `src` folder by their group name and moves all files of "
"one group to a new sub folder in `target`")
parser.add_argument("src", help="source folder containing files of pattern "
"'{group-name}_{5-digit-sequence-nr}.{ext}'")
parser.add_argument("target", help="target folder path")
parser.add_argument("-c", "--cut-seq-name", action="store_true",
help="only keep the sequence number instead of the full image name")
args = parser.parse_args()
group_mv(args.src, args.target, args.cut_seq_name)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment