Skip to content

Instantly share code, notes, and snippets.

@audax
Created November 18, 2013 11:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save audax/7526331 to your computer and use it in GitHub Desktop.
Save audax/7526331 to your computer and use it in GitHub Desktop.
Ordnet alten Prüfungsleistungen der PO2004 die neuen der PO2009 zu. Für die Leibniz Uni Hannover.
from collections import namedtuple, defaultdict
from pprint import pprint
from lxml import html
import csv
Info = namedtuple('Info', 'new old')
def parse_mapping(tree):
mapping = defaultdict(list)
rows = tree.xpath('//tr')
for row in rows[1:]:
try:
modul_tag, prfn_tag = row[0].cssselect('small')
except ValueError:
continue
old = {}
old['modul'] = modul_tag.text.rstrip(':')
old['name'] = row[0][1].tail
old['prfn'] = prfn_tag[0].text.split(': ')[1]
new = {}
new_td = row[2]
new['kompetenz'] = new_td[0].text.rstrip(' - ')
new['modul'] = new_td[0][0].tail.strip().rstrip(':')
try:
new['name'] = new_td.cssselect('b')[1].text
except IndexError:
new['name'] = new_td[1].tail
if new['name'] is None:
continue
new['prfn'] = new_td.cssselect('small > i')[0].text.split(': ')[1]
mapping[old['prfn']].append(Info(new=new, old=old))
return mapping
def write_mapping(mapping, filename='mapping.csv'):
with open(filename, 'w', newline='') as out:
writer = csv.writer(out)
writer.writerow(['Alte Prüfungsnummer', 'Altes Modul',
'Alter Name', 'Prüfungsnummer', 'Kompetenzbereich', 'Name'])
for prfn in sorted(mapping.keys()):
for info in mapping[prfn]:
old, new = info.old, info.new
writer.writerow([old['prfn'], old['modul'], old['name'], new['prfn'], new['kompetenz'], new['name']])
# noten.csv is in my case the qis-notespiegel html page, parsed with LibreOffice and saved as a single csv
with open('noten.csv') as noten:
reader = csv.reader(noten, delimiter=',')
errors = []
duplicates = defaultdict(list)
# mapping.html ist von http://www.dbs.uni-hannover.de/fbinf/modkat/mapping.php
tree = html.parse('mapping.html')
mapping = parse_mapping(tree)
row_mapping = {}
filtered_mapping = {}
for row in reader:
if len(row) > 2:
prfn, name, note, best, cp, _ = row
try:
if prfn in filtered_mapping:
duplicates[prfn].append(row_mapping[prfn])
duplicates[prfn].append(row)
filtered_mapping[prfn] = mapping[prfn]
row_mapping[prfn] = row
except KeyError:
errors.append(row)
write_mapping(filtered_mapping, 'result.csv')
print('not found: ')
print(errors)
print('duplicates: ')
for items in duplicates.items():
pprint(items)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment