Created
November 18, 2013 11:28
-
-
Save audax/7526331 to your computer and use it in GitHub Desktop.
Ordnet alten Prüfungsleistungen der PO2004 die neuen der PO2009 zu. Für die Leibniz Uni Hannover.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple, defaultdict | |
from pprint import pprint | |
from lxml import html | |
import csv | |
Info = namedtuple('Info', 'new old') | |
def parse_mapping(tree): | |
mapping = defaultdict(list) | |
rows = tree.xpath('//tr') | |
for row in rows[1:]: | |
try: | |
modul_tag, prfn_tag = row[0].cssselect('small') | |
except ValueError: | |
continue | |
old = {} | |
old['modul'] = modul_tag.text.rstrip(':') | |
old['name'] = row[0][1].tail | |
old['prfn'] = prfn_tag[0].text.split(': ')[1] | |
new = {} | |
new_td = row[2] | |
new['kompetenz'] = new_td[0].text.rstrip(' - ') | |
new['modul'] = new_td[0][0].tail.strip().rstrip(':') | |
try: | |
new['name'] = new_td.cssselect('b')[1].text | |
except IndexError: | |
new['name'] = new_td[1].tail | |
if new['name'] is None: | |
continue | |
new['prfn'] = new_td.cssselect('small > i')[0].text.split(': ')[1] | |
mapping[old['prfn']].append(Info(new=new, old=old)) | |
return mapping | |
def write_mapping(mapping, filename='mapping.csv'): | |
with open(filename, 'w', newline='') as out: | |
writer = csv.writer(out) | |
writer.writerow(['Alte Prüfungsnummer', 'Altes Modul', | |
'Alter Name', 'Prüfungsnummer', 'Kompetenzbereich', 'Name']) | |
for prfn in sorted(mapping.keys()): | |
for info in mapping[prfn]: | |
old, new = info.old, info.new | |
writer.writerow([old['prfn'], old['modul'], old['name'], new['prfn'], new['kompetenz'], new['name']]) | |
# noten.csv is in my case the qis-notespiegel html page, parsed with LibreOffice and saved as a single csv | |
with open('noten.csv') as noten: | |
reader = csv.reader(noten, delimiter=',') | |
errors = [] | |
duplicates = defaultdict(list) | |
# mapping.html ist von http://www.dbs.uni-hannover.de/fbinf/modkat/mapping.php | |
tree = html.parse('mapping.html') | |
mapping = parse_mapping(tree) | |
row_mapping = {} | |
filtered_mapping = {} | |
for row in reader: | |
if len(row) > 2: | |
prfn, name, note, best, cp, _ = row | |
try: | |
if prfn in filtered_mapping: | |
duplicates[prfn].append(row_mapping[prfn]) | |
duplicates[prfn].append(row) | |
filtered_mapping[prfn] = mapping[prfn] | |
row_mapping[prfn] = row | |
except KeyError: | |
errors.append(row) | |
write_mapping(filtered_mapping, 'result.csv') | |
print('not found: ') | |
print(errors) | |
print('duplicates: ') | |
for items in duplicates.items(): | |
pprint(items) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment