Skip to content

Instantly share code, notes, and snippets.

@Japanuspus
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Japanuspus/b0cfc304e2c569eb5524 to your computer and use it in GitHub Desktop.
Save Japanuspus/b0cfc304e2c569eb5524 to your computer and use it in GitHub Desktop.
A python script for preparing files for upload to the Lectio system by adding student numbers to file names based on a reference folder
#!/usr/bin/env python2.7
"""
Lectiolize: A python script for preparing files for upload to the Lectio system
by adding student numbers to file names based on a reference folder.
(C) janus@insignificancegalore.net, 2014
"""
from collections import namedtuple
import re
import os, os.path
import unicodedata
import itertools
import collections
_lectiolize_lectioname = re.compile('(\d+)_([^_]+)_')
def normalize(s):
"""
Normalize by throwing away all non-ascii characters after composing
"""
return unicodedata.normalize('NFKC', s).encode('ascii', 'ignore')
def partmatch(ref, s):
return (len(s)==1 and ref.startswith(s)) or ref == s
class NameAndNumber:
def __init__(self, name, number):
self.name = name
self.number = number
def matches(self, name):
"""
Decide whether self.name matches named
This is the case if the parts of name matches the a subset of the
parts of self.name, in the order they appear.
Parts match if they are a matching initial or the full name
"""
ref = iter(normalize(self.name).split(' '))
tgt = normalize(name).split(' ')
return all(next((r for r in ref if partmatch(r,s)), None) for s in tgt)
def get_refs_folder(folder):
"""
Return a list of NameAndNumber objects, corresponding to all files in folder
of the form <number>_<name with spaces>[_<title>].ext
"""
return [
NameAndNumber(m.group(2), m.group(1)) for m in (
_lectiolize_lectioname.match(f) for f in os.listdir(unicode(folder))
) if m is not None ]
def rename_matches(target_folder, references, title = None, dryrun = False):
"""
Rename each file in target_folder that matches exactly one
reference which is not matched by any other file in the target folder
"""
is_assigned = collections.defaultdict(list)
is_bad = {}
files = os.listdir(unicode(target_folder))
for f in files:
f_name = os.path.splitext(f)[0]
has_match = False
for ref in references:
if ref.matches(f_name):
is_assigned[ref].append(f)
if has_match:
is_bad[f] = True
has_match = True
for ref, files in is_assigned.iteritems():
if not len(files) == 1:
if len(files) == 0:
print 'No match for %s'%ref.number
else:
print 'Multiple matches for %s: {%s}'%(ref.number, ', '.join(files))
else:
filename = files[0]
if filename in is_bad:
print '%s matched %s but also others'%(filename, ref.number)
continue
# all is good
if title is None:
newname = '%s_%s'%(ref.number, filename)
else:
filename_parts = os.path.splitext(filename)
newname = '%s_%s_%s%s'%(ref.number, filename_parts[0], title, filename_parts[1])
print 'Renaming %20s to %s'%(filename, newname)
if not dryrun:
os.rename(os.path.join(target_folder, filename), os.path.join(target_folder, newname))
def main():
import argparse
parser = argparse.ArgumentParser(description="""
Rename a set of files to lectio format nr_name_title.ext based on
a reference folder or list of names and numbers
The files to rename should be named
<First name> [initial_or_surname ]+.<extension>
Example use:
lectiolize.py --title 'Crazy TLA' -r folder_from_lectio 'my folder'
""", formatter_class = argparse.RawTextHelpFormatter)
parser.add_argument('--reference_folder', '-r',
help = 'A folder to scan for files with correct numbers')
parser.add_argument('--title', '-t', default = None,
help = 'Title to add to end of renamed files')
parser.add_argument('--dryrun', '-n', action = 'store_true', default = False,
help = 'Only show what would be done')
parser.add_argument('target_folder',
help = 'Folder in which to rename files')
args = parser.parse_args()
refs = get_refs_folder(args.reference_folder)
rename_matches(args.target_folder, refs, args.title, dryrun = args.dryrun)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment