Last active
March 15, 2024 13:16
-
-
Save ftnext/300b2cb59d9a3effa0aa78faa442b4eb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rouge_score.rouge_scorer import RougeScorer | |
from rouge_score.tokenize import SPACES_RE | |
from rouge_score.tokenizers import Tokenizer | |
class NonAlphaNumericSupportTokenizer(Tokenizer): | |
""" | |
>>> NonAlphaNumericSupportTokenizer().tokenize("いぬ ねこ") | |
['いぬ', 'ねこ'] | |
""" | |
def tokenize(self, text): | |
return SPACES_RE.split(text.lower()) | |
if __name__ == "__main__": | |
scorer = RougeScorer( | |
["rouge1", "rouge2", "rougeL", "rougeLsum"], | |
tokenizer=NonAlphaNumericSupportTokenizer(), | |
) | |
print(scorer.score("いぬ ねこ", "いぬ ねこ")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment