miikargh/evaluate.py

## evaluate.py
import json
from glob import glob
from collections import defaultdict
import copy

import pke
from sklearn.metrics import f1_score, precision_score, recall_score


NUM_KEYPHRASES = 10
STEMMING = True

TEST_DATA = './data/semeval-2010-pre/test/lvl-2/*'
STANDARD_KPS = './data/semeval-2010-pre/references/test.combined.stem.json'

def flatten(lst):
    '''Makes two-dimentional list into one dimentional.'''
    return [item for sublist in lst for item in sublist]


def make_equal_len(lst_a, lst_b, in_place=False, empty_marker='<EMPTY>'):
    '''Appends Nones to shorter list to make it match the length of the longer.

        Args:
            lst_a (list(str)): List of strings
            lst_b (list(str)): List of strings
            in_place(bool): Modifies originals if set to True, else creates new ones

        Returns:
            (tuple(list, list)): Tuple consisting of (lengthened) a and b in their original order.
    '''

    if in_place:
        a = lst_a
        b = lst_b

    else:
        a = copy.deepcopy(lst_a)
        b = copy.deepcopy(lst_b)

    if len(a) == len(b):
        return a, b

    if len(a) < len(b):
        for i in range(len(b) - len(a)):
            a.append(empty_marker)

    if len(a) > len(b):
        for i in range(len(a) - len(b)):
            b.append(empty_marker)

    assert len(a) == len(b)

    return a, b


if __name__ == '__main__':
    # Extract keyphrases with TopicRank
    extracted = defaultdict(list)
    filenames = [f for f in glob(TEST_DATA) if f.endswith('.xml')]

    for fname in filenames:
        doc_id = fname.split('/')[-1].split('.')[0]

        print('Extracting keyphrases for document {}'.format(doc_id))

        extractor = pke.unsupervised.TopicRank(input_file=fname)
        extractor.read_document(format='corenlp')
        extractor.candidate_selection()
        extractor.candidate_weighting()

        best = [k[0] for k in extractor.get_n_best(n=NUM_KEYPHRASES, stemming=STEMMING)]

        extracted[doc_id] = best


    # Load stanard keyphrases
    with open(STANDARD_KPS, 'r') as f:
        standard = {k: flatten(v) for k, v in json.loads(f.read()).items()}


    # Get list of all keyphrases
    standard_kps = list(set(flatten(standard.values())))

    # Make two 1-dimentional lists for evaluation
    all_std = []
    all_ext = []
    for doc_id, ext in extracted.items():
        std = standard[doc_id]

        std_e, ext_e = make_equal_len(std[:NUM_KEYPHRASES], ext[:NUM_KEYPHRASES])

        all_std += std_e
        all_ext += ext_e


    # Evaluate
    f = f1_score(all_std, all_ext, labels=standard_kps, average='micro')
    print('F-score with micro average {}'.format(f * 100))

    # Make into binary lists (1 == match, 0 == no match)
    all_std_bin = [1 for i in range(len(all_std))]
    all_ext_bin = [int(k == all_std[i]) for i, k in enumerate(all_ext)]

    f_bin = f1_score(all_std_bin, all_ext_bin)
    print('F-score with binary average {}'.format(f_bin * 100))
	import json
	from glob import glob
	from collections import defaultdict
	import copy

	import pke
	from sklearn.metrics import f1_score, precision_score, recall_score


	NUM_KEYPHRASES = 10
	STEMMING = True

	TEST_DATA = './data/semeval-2010-pre/test/lvl-2/*'
	STANDARD_KPS = './data/semeval-2010-pre/references/test.combined.stem.json'

	def flatten(lst):
	'''Makes two-dimentional list into one dimentional.'''
	return [item for sublist in lst for item in sublist]


	def make_equal_len(lst_a, lst_b, in_place=False, empty_marker='<EMPTY>'):
	'''Appends Nones to shorter list to make it match the length of the longer.

	Args:
	lst_a (list(str)): List of strings
	lst_b (list(str)): List of strings
	in_place(bool): Modifies originals if set to True, else creates new ones

	Returns:
	(tuple(list, list)): Tuple consisting of (lengthened) a and b in their original order.
	'''

	if in_place:
	a = lst_a
	b = lst_b

	else:
	a = copy.deepcopy(lst_a)
	b = copy.deepcopy(lst_b)

	if len(a) == len(b):
	return a, b

	if len(a) < len(b):
	for i in range(len(b) - len(a)):
	a.append(empty_marker)

	if len(a) > len(b):
	for i in range(len(a) - len(b)):
	b.append(empty_marker)

	assert len(a) == len(b)

	return a, b



	if __name__ == '__main__':
	# Extract keyphrases with TopicRank
	extracted = defaultdict(list)
	filenames = [f for f in glob(TEST_DATA) if f.endswith('.xml')]

	for fname in filenames:
	doc_id = fname.split('/')[-1].split('.')[0]

	print('Extracting keyphrases for document {}'.format(doc_id))

	extractor = pke.unsupervised.TopicRank(input_file=fname)
	extractor.read_document(format='corenlp')
	extractor.candidate_selection()
	extractor.candidate_weighting()

	best = [k[0] for k in extractor.get_n_best(n=NUM_KEYPHRASES, stemming=STEMMING)]

	extracted[doc_id] = best


	# Load stanard keyphrases
	with open(STANDARD_KPS, 'r') as f:
	standard = {k: flatten(v) for k, v in json.loads(f.read()).items()}


	# Get list of all keyphrases
	standard_kps = list(set(flatten(standard.values())))

	# Make two 1-dimentional lists for evaluation
	all_std = []
	all_ext = []
	for doc_id, ext in extracted.items():
	std = standard[doc_id]

	std_e, ext_e = make_equal_len(std[:NUM_KEYPHRASES], ext[:NUM_KEYPHRASES])

	all_std += std_e
	all_ext += ext_e


	# Evaluate
	f = f1_score(all_std, all_ext, labels=standard_kps, average='micro')
	print('F-score with micro average {}'.format(f * 100))

	# Make into binary lists (1 == match, 0 == no match)
	all_std_bin = [1 for i in range(len(all_std))]
	all_ext_bin = [int(k == all_std[i]) for i, k in enumerate(all_ext)]

	f_bin = f1_score(all_std_bin, all_ext_bin)
	print('F-score with binary average {}'.format(f_bin * 100))