hughsando/score_mines.py

## score_mines.py
MAX_DISTANCE = sqrt(25000.0 ** 2 + 25000.0 ** 2)

def score_mine_commodity_label(predicted, actual):
    """
    Score the predictions for commodity label against the actual values

    Parameters:
        predicted, actual - predicted and actual deposit info
    """
    # Handle zero deposit case
    if len(actual) == 0:
        if len(predicted) == 0:
            return 1.0
        else:
            return 0.0
    elif len(predicted) == 0:
        return 0.0


    pred_commodities = set()
    for comm in predicted:
        pred_commodities.add(comm)

    actual_commodities = set()
    for comm in actual:
        actual_commodities.add(comm)

    # Work out how many right, missed and wrong
    right = len([c for c in pred_commodities if c in actual_commodities])
    wrong = len([c for c in pred_commodities if c not in actual_commodities])
    missed = len([c for c in actual_commodities if c not in pred_commodities])
    total = right + wrong + missed
    return 0.5 + ((right - 0.5 * missed - wrong) / (2 * total))


def score_mine(mine0, mine1):

    dist_score = 1 - np.sqrt( np.square( mine0[:2] - mine1[:2]).sum() ) / MAX_DISTANCE

    pred_commodities = mine0[2].split(";")
    actual_commodities = mine1[2].split(";")

    comm_score = score_mine_commodity_label(pred_commodities, actual_commodities)

    return 0.75 * comm_score + 0.25 * dist_score

def score_stamp_matched(pred_xyc, actual_xyc):

    total_count = max( pred_xyc.shape[0], actual_xyc.shape[0])
    # Got 0/0 = perfect score
    if total_count==0:
        return 1.0

    common_count = min( pred_xyc.shape[0], actual_xyc.shape[0])
    # nothing can match = worst score
    if common_count==0:
        return 0.0

    total_score = 0.0
    actual_used = np.zeros( actual_xyc.shape[0])
    for pred in pred_xyc:
        best_score = -1
        best_id = -1
        for a_id, act in enumerate(actual_xyc):
            if not actual_used[a_id]:
                score = score_mine(pred,act)
                if score>best_score:
                    best_score = score
                    best_id = a_id
        total_score += best_score
        actual_used[best_id] = True

    return total_score / total_count


def score_stamp_matched_dataframe(predicted, actual):
    pred_xyc =  predicted[['x','y','commodity']].values
    actual_xyc =  actual[['x','y','commodity']].values
    return score_stamp_matched(pred_xyc=pred_xyc, actual_xyc=actual_xyc)
	MAX_DISTANCE = sqrt(25000.0 2 + 25000.0 2)

	def score_mine_commodity_label(predicted, actual):
	"""
	Score the predictions for commodity label against the actual values

	Parameters:
	predicted, actual - predicted and actual deposit info
	"""
	# Handle zero deposit case
	if len(actual) == 0:
	if len(predicted) == 0:
	return 1.0
	else:
	return 0.0
	elif len(predicted) == 0:
	return 0.0


	pred_commodities = set()
	for comm in predicted:
	pred_commodities.add(comm)

	actual_commodities = set()
	for comm in actual:
	actual_commodities.add(comm)

	# Work out how many right, missed and wrong
	right = len([c for c in pred_commodities if c in actual_commodities])
	wrong = len([c for c in pred_commodities if c not in actual_commodities])
	missed = len([c for c in actual_commodities if c not in pred_commodities])
	total = right + wrong + missed
	return 0.5 + ((right - 0.5 * missed - wrong) / (2 * total))


	def score_mine(mine0, mine1):

	dist_score = 1 - np.sqrt( np.square( mine0[:2] - mine1[:2]).sum() ) / MAX_DISTANCE

	pred_commodities = mine0[2].split(";")
	actual_commodities = mine1[2].split(";")

	comm_score = score_mine_commodity_label(pred_commodities, actual_commodities)

	return 0.75 * comm_score + 0.25 * dist_score

	def score_stamp_matched(pred_xyc, actual_xyc):

	total_count = max( pred_xyc.shape[0], actual_xyc.shape[0])
	# Got 0/0 = perfect score
	if total_count==0:
	return 1.0

	common_count = min( pred_xyc.shape[0], actual_xyc.shape[0])
	# nothing can match = worst score
	if common_count==0:
	return 0.0

	total_score = 0.0
	actual_used = np.zeros( actual_xyc.shape[0])
	for pred in pred_xyc:
	best_score = -1
	best_id = -1
	for a_id, act in enumerate(actual_xyc):
	if not actual_used[a_id]:
	score = score_mine(pred,act)
	if score>best_score:
	best_score = score
	best_id = a_id
	total_score += best_score
	actual_used[best_id] = True

	return total_score / total_count




	def score_stamp_matched_dataframe(predicted, actual):
	pred_xyc = predicted[['x','y','commodity']].values
	actual_xyc = actual[['x','y','commodity']].values
	return score_stamp_matched(pred_xyc=pred_xyc, actual_xyc=actual_xyc)