Skip to content

Instantly share code, notes, and snippets.

@rybesh
Created May 22, 2013 13:22
Show Gist options
  • Save rybesh/5627500 to your computer and use it in GitHub Desktop.
Save rybesh/5627500 to your computer and use it in GitHub Desktop.
Counting matches using `linear_edit_distance`.
from segeval.similarity.distance.SingleBoundaryDistance import linear_edit_distance
from segeval.data.Samples import HEARST_1997_STARGAZER
from itertools import chain, combinations
def compare_boundaries(dataset):
return tuple(zip(*[ linear_edit_distance(item[pair[0]], item[pair[1]], 2)
for item in dataset.values()
for pair in combinations(dataset.coders, 2) ]))
distances, transpositions, errors, match_counts = compare_boundaries(HEARST_1997_STARGAZER)
n_errors = len(list(chain(*errors)))
n_transpositions = len(list(chain(*transpositions)))
n_matches = sum(match_counts)
print n_errors, 'errors'
print n_transpositions, 'transpositions'
print n_matches, 'matches'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment