Last active
August 29, 2015 14:01
-
-
Save Japanuspus/b0cfc304e2c569eb5524 to your computer and use it in GitHub Desktop.
A python script for preparing files for upload to the Lectio system by adding student numbers to file names based on a reference folder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2.7 | |
""" | |
Lectiolize: A python script for preparing files for upload to the Lectio system | |
by adding student numbers to file names based on a reference folder. | |
(C) janus@insignificancegalore.net, 2014 | |
""" | |
from collections import namedtuple | |
import re | |
import os, os.path | |
import unicodedata | |
import itertools | |
import collections | |
_lectiolize_lectioname = re.compile('(\d+)_([^_]+)_') | |
def normalize(s): | |
""" | |
Normalize by throwing away all non-ascii characters after composing | |
""" | |
return unicodedata.normalize('NFKC', s).encode('ascii', 'ignore') | |
def partmatch(ref, s): | |
return (len(s)==1 and ref.startswith(s)) or ref == s | |
class NameAndNumber: | |
def __init__(self, name, number): | |
self.name = name | |
self.number = number | |
def matches(self, name): | |
""" | |
Decide whether self.name matches named | |
This is the case if the parts of name matches the a subset of the | |
parts of self.name, in the order they appear. | |
Parts match if they are a matching initial or the full name | |
""" | |
ref = iter(normalize(self.name).split(' ')) | |
tgt = normalize(name).split(' ') | |
return all(next((r for r in ref if partmatch(r,s)), None) for s in tgt) | |
def get_refs_folder(folder): | |
""" | |
Return a list of NameAndNumber objects, corresponding to all files in folder | |
of the form <number>_<name with spaces>[_<title>].ext | |
""" | |
return [ | |
NameAndNumber(m.group(2), m.group(1)) for m in ( | |
_lectiolize_lectioname.match(f) for f in os.listdir(unicode(folder)) | |
) if m is not None ] | |
def rename_matches(target_folder, references, title = None, dryrun = False): | |
""" | |
Rename each file in target_folder that matches exactly one | |
reference which is not matched by any other file in the target folder | |
""" | |
is_assigned = collections.defaultdict(list) | |
is_bad = {} | |
files = os.listdir(unicode(target_folder)) | |
for f in files: | |
f_name = os.path.splitext(f)[0] | |
has_match = False | |
for ref in references: | |
if ref.matches(f_name): | |
is_assigned[ref].append(f) | |
if has_match: | |
is_bad[f] = True | |
has_match = True | |
for ref, files in is_assigned.iteritems(): | |
if not len(files) == 1: | |
if len(files) == 0: | |
print 'No match for %s'%ref.number | |
else: | |
print 'Multiple matches for %s: {%s}'%(ref.number, ', '.join(files)) | |
else: | |
filename = files[0] | |
if filename in is_bad: | |
print '%s matched %s but also others'%(filename, ref.number) | |
continue | |
# all is good | |
if title is None: | |
newname = '%s_%s'%(ref.number, filename) | |
else: | |
filename_parts = os.path.splitext(filename) | |
newname = '%s_%s_%s%s'%(ref.number, filename_parts[0], title, filename_parts[1]) | |
print 'Renaming %20s to %s'%(filename, newname) | |
if not dryrun: | |
os.rename(os.path.join(target_folder, filename), os.path.join(target_folder, newname)) | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser(description=""" | |
Rename a set of files to lectio format nr_name_title.ext based on | |
a reference folder or list of names and numbers | |
The files to rename should be named | |
<First name> [initial_or_surname ]+.<extension> | |
Example use: | |
lectiolize.py --title 'Crazy TLA' -r folder_from_lectio 'my folder' | |
""", formatter_class = argparse.RawTextHelpFormatter) | |
parser.add_argument('--reference_folder', '-r', | |
help = 'A folder to scan for files with correct numbers') | |
parser.add_argument('--title', '-t', default = None, | |
help = 'Title to add to end of renamed files') | |
parser.add_argument('--dryrun', '-n', action = 'store_true', default = False, | |
help = 'Only show what would be done') | |
parser.add_argument('target_folder', | |
help = 'Folder in which to rename files') | |
args = parser.parse_args() | |
refs = get_refs_folder(args.reference_folder) | |
rename_matches(args.target_folder, refs, args.title, dryrun = args.dryrun) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment