Skip to content

Instantly share code, notes, and snippets.

@justinwyer
Created March 3, 2013 21:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save justinwyer/5078281 to your computer and use it in GitHub Desktop.
Save justinwyer/5078281 to your computer and use it in GitHub Desktop.
from psycopg2 import connect
from Levenshtein import *
conn = connect("dbname=education user=Jwyer host=localhost")
id_look_up = {}
candidates = []
cursor = conn.cursor()
cursor.execute("select id, name from school")
for row in cursor.fetchall():
candidates.append(row[1])
id_look_up[row[1]] = row[0]
schools = []
cursor = conn.cursor()
cursor.execute("select distinct(centre_name) from pass_quality")
for row in cursor.fetchall():
schools.append(row[0])
options = {}
gpsless = []
for school in schools:
for candidate in candidates:
if " P " in candidate or " Sp " in candidate or " Js " in candidate or " Jp " in candidate or "Junior" in candidate or "Primary" in candidate:
continue
test_school = school.replace('High', '')
test_school = test_school.replace('School', '')
test_school = test_school.replace('Secondary', '')
test_school = test_school.replace('Senior', '')
test_school = test_school.replace('Primary', '')
test_school = test_school.replace('Junior', '')
test_school = test_school.replace('Compehensive', '')
test_school = test_school.replace(' Comp', '')
test_school = test_school.replace(' Sp ', '')
test_school = test_school.replace(' Ss ', '')
test_school = test_school.replace(' Js ', '')
test_school = test_school.replace('Tech', '')
test_school = test_school.replace(' Ths', '')
test_school = test_school.replace('Boys', '')
test_school = test_school.replace('Girls', '')
test_school = test_school.replace('College', '')
test_school = test_school.replace('Institution', '')
test_school = test_school.replace('Academy', '')
test_school = test_school.replace(' S ', '')
test_school = test_school.replace(' ', ' ')
test_school = test_school.strip()
test_candidate = candidate.replace('High', '')
test_candidate = test_candidate.replace('School', '')
test_candidate = test_candidate.replace('Secondary', '')
test_candidate = test_candidate.replace('Senior', '')
test_candidate = test_candidate.replace('Primary', '')
test_candidate = test_candidate.replace('Junior', '')
test_candidate = test_candidate.replace('Compehensive', '')
test_candidate = test_candidate.replace(' Comp', '')
test_candidate = test_candidate.replace(' Sp ', '')
test_candidate = test_candidate.replace(' Ss ', '')
test_candidate = test_candidate.replace(' Js ', '')
test_candidate = test_candidate.replace('Tech', '')
test_candidate = test_candidate.replace(' Ths', '')
test_candidate = test_candidate.replace('Boys', '')
test_candidate = test_candidate.replace('Girls', '')
test_candidate = test_candidate.replace('College', '')
test_candidate = test_candidate.replace('Institution', '')
test_candidate = test_candidate.replace('Academy', '')
test_candidate = test_candidate.replace(' S ', '')
test_candidate = test_candidate.replace(' ', ' ')
test_candidate = test_candidate.strip()
diff = distance(test_school, test_candidate)
if diff < 3:
if not options.has_key(school):
options[school] = {}
if not options[school].has_key(diff):
options[school][diff] = []
options[school][diff].append(candidate)
if not options.has_key(school):
gpsless.append(school)
for key in options.keys():
#print " - %s" % key
closest = min(options[key].keys())
# for name in options[key][closest]:
#print " = %ld = %s" % (closest, name)
centre_name = key.replace("'", "''")
print "update pass_quality set school_id = %ld where centre_name = '%s';" % (id_look_up[options[key][closest][0]], centre_name)
#for school in gpsless:
#print " ? %s" % school
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment