Last active
November 15, 2018 19:00
-
-
Save eddieantonio/b06a21d8a229aa996cbd43b5b5363c0d to your computer and use it in GitHub Desktop.
Ideas on how to represent Cree morphological data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: UTF-8 -*- | |
""" | |
Some ideas about how to organize data from the FST, to how to store Cree | |
wordforms. | |
Usage: | |
Analyzing a wordform descriptively yields "raw" FST output: | |
>>> analysis = next(lookup('kiwapamin')) | |
>>> type(analysis) | |
FSTOutput | |
>>> analysis.input | |
'wapamew' | |
>>> analysis.output | |
('w', 'â', 'p', 'a', 'm', 'ê', 'w', '+V', '+TA', '+Ind', '+Prs', | |
'+2Sg', '+1SgO') | |
>>> analysis.weight | |
0.0 | |
There should be a function that parses the FSTOutput and creates a Cree | |
wordform, which can then be queried for linguistic information: | |
>>> kiwâpamin = to_wordform(analysis) | |
>>> type(kiwâpamin) | |
CreeWordform | |
>>> kiwâpamin.prelemma_tags | |
() | |
>>> kiwâpamin.lemma | |
'wâpamêw' | |
>>> kiwâpamin.postlemma_tags | |
(<Tag pos='V'>, <Tag animacy='TA'>, <Tag indicative='Ind'>, <Tag tense='Prs'>, | |
<Tag actor='2Sg'>, <Tag goal='1Sg'>) | |
>>> kiwâpamin.tag['pos'] | |
<Tag pos='V'> | |
>>> kiwâpamin.tag['pos'] == 'V' | |
True | |
>>> kiwâpamin.tag['actor'] | |
'2Sg' | |
>>> kiwâpamin.tag['goal'] | |
'1Sg' | |
>>> kiwâpamin.to_sro() | |
'kiwâpamin' | |
>>> kiwâpamin.to_syllabics() | |
'ᑭᐚᐸᒥᐣ' | |
FST inputs that are not accepted will | |
>>> analysis = next(lookup('fhqwhgads')) | |
>>> analysis.accepted | |
False | |
>>> analysis.output | |
('f', 'h', 'q', 'w', 'h', 'g', 'a', 'd', 's', '+?') | |
>>> analysis.weight | |
inf | |
""" | |
from dataclasses import dataclass | |
from typing import Sequence, Mapping | |
@dataclass | |
class FSTOutput: | |
# The input string. | |
input: str | |
# One possible transduction, as a sequence of strings. | |
output: Sequence[str] | |
# The weight of the transduction. | |
weight: float | |
@property | |
def accepted(self): | |
""" | |
Returns whether the transduction succeeded. | |
""" | |
from math import inf | |
return weight < inf | |
@dataclass | |
class Tag: | |
... | |
@dataclass | |
class CreeWordform: | |
prelemma_tags: Sequence[Tag] | |
lemma: str | |
postlemma_tags: Sequence[Tag] | |
tags: Mapping[str, Tag] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment