Created
September 24, 2023 16:35
-
-
Save robintux/731a8b143ffc0b98a969f8c980e3825e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
patrones = [ | |
(r"[Aa]m$", "BEM"), # irregular forms of 'to be' | |
(r"[Aa]re$", "BER"), # | |
(r"[Ii]s$", "BEZ"), # | |
(r"[Ww]as$", "BEDZ"), # | |
(r"[Ww]ere$", "BED"), # | |
(r"[Bb]een$", "BEN"), # | |
(r"[Hh]ave$", "HV"), # irregular forms of 'to have' | |
(r"[Hh]as$", "HVZ"), # | |
(r"[Hh]ad$", "HVD"), # | |
(r"I$", "PRP"), # personal pronouns | |
(r"[Yy]ou$", "PRP"), # | |
(r"[Hh]e$", "PRP"), # | |
(r"[Ss]he$", "PRP"), # | |
(r"[Ii]t$", "PRP"), # | |
(r"[Tt]hey$", "PRP"), # | |
(r"[Aa]n?$", "AT"), # | |
(r"[Tt]he$", "AT"), # | |
(r"[Ww]h.+$", "WP"), # wh- pronoun | |
(r".*ing$", "VBG"), # gerunds | |
(r".*ed$", "VBD"), # simple past | |
(r".*es$", "VBZ"), # 3rd singular present | |
(r"[Cc]an(not|n\'t)?$", "MD"), # modals | |
(r"[Mm]ight$", "MD"), # | |
(r"[Mm]ay$", "MD"), # | |
(r".+ould$", "MD"), # modals: could, should, would | |
(r".*ly$", "RB"), # adverbs | |
(r".*\'s$", "NN$"), # possessive nouns | |
(r".*s$", "NNS"), # plural nouns | |
(r"-?[0-9]+(.[0-9]+)?$", "CD"), # cardinal numbers | |
(r"^to$", "TO"), # to | |
(r"^in$", "IN"), # in prep | |
(r"^[A-Z]+([a-z])*$", "NNP"), # proper nouns | |
(r".*", "NN"), # nouns (default) | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment