tsaylor/matcher.py

## matcher.py
import random, math

position_weighting = 3.0
max_pos = 10
near_miss_scaling = 1.0 # bigger means higher scores


total_points = 0
total_match_points = 0
sf=[]


def get_match_pct(list1, list2):
    total_points = 0
    total_match_points = 0
    global near_miss_scaling, sf
    #for each position
    for position in xrange(len(list1)):
        #calculate weight based on position
        weight = (position_weighting+1)-math.ceil(position*position_weighting/max_pos)
        #add weight to total points
        total_points += weight

        #XXX naive matching
        #if answers match
            #add weight to total match points

        #XXX scaled gap matching (requires all answers to be in both lists!)
        #find the position of the answer in list2
        position2 = list2.index(list1[position])
        #calculate the scaling based on relative positions
        scale_factor = 1.0/((math.fabs(position-position2)/near_miss_scaling)+1.0)
        if scale_factor not in sf:
            print scale_factor
            sf.append(scale_factor)
        #add scaled weight to total match points
        #print "list1: %d[%d]  list2: %d[%d]  weight: %d  scale factor: %s  score: %s"% (list1[position], position, list2[position2], position2, weight, str(scale_factor), str(float(weight)*scale_factor))
        total_match_points += float(scale_factor) * weight

    #calculate match percentage
    total_match_points = round(total_match_points)
    match_pct = total_match_points * 100 / total_points
    return match_pct


def genlist(size):
    a = [1,2,3,4,5,6,7,8,9,10]
    random.seed()
    random.shuffle(a)
    return a


count = 0
rresult = []
for i in xrange(10000):
    first = genlist(10)
    second = genlist(10)
    result = get_match_pct(first, second)
    rresult += [result]
    #count += 1
    #print str(result) + " ",
#print rresult
print "0 count: " + str(rresult.count(0))
print "mean: " + str(sum(rresult)/(len(rresult)))
print "min: " + str(min(rresult))
print "max: " + str(max(rresult))
	import random, math

	position_weighting = 3.0
	max_pos = 10
	near_miss_scaling = 1.0 # bigger means higher scores


	total_points = 0
	total_match_points = 0
	sf=[]


	def get_match_pct(list1, list2):
	total_points = 0
	total_match_points = 0
	global near_miss_scaling, sf
	#for each position
	for position in xrange(len(list1)):
	#calculate weight based on position
	weight = (position_weighting+1)-math.ceil(position*position_weighting/max_pos)
	#add weight to total points
	total_points += weight

	#XXX naive matching
	#if answers match
	#add weight to total match points

	#XXX scaled gap matching (requires all answers to be in both lists!)
	#find the position of the answer in list2
	position2 = list2.index(list1[position])
	#calculate the scaling based on relative positions
	scale_factor = 1.0/((math.fabs(position-position2)/near_miss_scaling)+1.0)
	if scale_factor not in sf:
	print scale_factor
	sf.append(scale_factor)
	#add scaled weight to total match points
	#print "list1: %d[%d] list2: %d[%d] weight: %d scale factor: %s score: %s"% (list1[position], position, list2[position2], position2, weight, str(scale_factor), str(float(weight)*scale_factor))
	total_match_points += float(scale_factor) * weight

	#calculate match percentage
	total_match_points = round(total_match_points)
	match_pct = total_match_points * 100 / total_points
	return match_pct




	def genlist(size):
	a = [1,2,3,4,5,6,7,8,9,10]
	random.seed()
	random.shuffle(a)
	return a


	count = 0
	rresult = []
	for i in xrange(10000):
	first = genlist(10)
	second = genlist(10)
	result = get_match_pct(first, second)
	rresult += [result]
	#count += 1
	#print str(result) + " ",
	#print rresult
	print "0 count: " + str(rresult.count(0))
	print "mean: " + str(sum(rresult)/(len(rresult)))
	print "min: " + str(min(rresult))
	print "max: " + str(max(rresult))