Created
March 3, 2013 21:03
-
-
Save justinwyer/5078281 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from psycopg2 import connect | |
from Levenshtein import * | |
conn = connect("dbname=education user=Jwyer host=localhost") | |
id_look_up = {} | |
candidates = [] | |
cursor = conn.cursor() | |
cursor.execute("select id, name from school") | |
for row in cursor.fetchall(): | |
candidates.append(row[1]) | |
id_look_up[row[1]] = row[0] | |
schools = [] | |
cursor = conn.cursor() | |
cursor.execute("select distinct(centre_name) from pass_quality") | |
for row in cursor.fetchall(): | |
schools.append(row[0]) | |
options = {} | |
gpsless = [] | |
for school in schools: | |
for candidate in candidates: | |
if " P " in candidate or " Sp " in candidate or " Js " in candidate or " Jp " in candidate or "Junior" in candidate or "Primary" in candidate: | |
continue | |
test_school = school.replace('High', '') | |
test_school = test_school.replace('School', '') | |
test_school = test_school.replace('Secondary', '') | |
test_school = test_school.replace('Senior', '') | |
test_school = test_school.replace('Primary', '') | |
test_school = test_school.replace('Junior', '') | |
test_school = test_school.replace('Compehensive', '') | |
test_school = test_school.replace(' Comp', '') | |
test_school = test_school.replace(' Sp ', '') | |
test_school = test_school.replace(' Ss ', '') | |
test_school = test_school.replace(' Js ', '') | |
test_school = test_school.replace('Tech', '') | |
test_school = test_school.replace(' Ths', '') | |
test_school = test_school.replace('Boys', '') | |
test_school = test_school.replace('Girls', '') | |
test_school = test_school.replace('College', '') | |
test_school = test_school.replace('Institution', '') | |
test_school = test_school.replace('Academy', '') | |
test_school = test_school.replace(' S ', '') | |
test_school = test_school.replace(' ', ' ') | |
test_school = test_school.strip() | |
test_candidate = candidate.replace('High', '') | |
test_candidate = test_candidate.replace('School', '') | |
test_candidate = test_candidate.replace('Secondary', '') | |
test_candidate = test_candidate.replace('Senior', '') | |
test_candidate = test_candidate.replace('Primary', '') | |
test_candidate = test_candidate.replace('Junior', '') | |
test_candidate = test_candidate.replace('Compehensive', '') | |
test_candidate = test_candidate.replace(' Comp', '') | |
test_candidate = test_candidate.replace(' Sp ', '') | |
test_candidate = test_candidate.replace(' Ss ', '') | |
test_candidate = test_candidate.replace(' Js ', '') | |
test_candidate = test_candidate.replace('Tech', '') | |
test_candidate = test_candidate.replace(' Ths', '') | |
test_candidate = test_candidate.replace('Boys', '') | |
test_candidate = test_candidate.replace('Girls', '') | |
test_candidate = test_candidate.replace('College', '') | |
test_candidate = test_candidate.replace('Institution', '') | |
test_candidate = test_candidate.replace('Academy', '') | |
test_candidate = test_candidate.replace(' S ', '') | |
test_candidate = test_candidate.replace(' ', ' ') | |
test_candidate = test_candidate.strip() | |
diff = distance(test_school, test_candidate) | |
if diff < 3: | |
if not options.has_key(school): | |
options[school] = {} | |
if not options[school].has_key(diff): | |
options[school][diff] = [] | |
options[school][diff].append(candidate) | |
if not options.has_key(school): | |
gpsless.append(school) | |
for key in options.keys(): | |
#print " - %s" % key | |
closest = min(options[key].keys()) | |
# for name in options[key][closest]: | |
#print " = %ld = %s" % (closest, name) | |
centre_name = key.replace("'", "''") | |
print "update pass_quality set school_id = %ld where centre_name = '%s';" % (id_look_up[options[key][closest][0]], centre_name) | |
#for school in gpsless: | |
#print " ? %s" % school |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment