Skip to content

Instantly share code, notes, and snippets.

@sebastiangeiger
Created December 22, 2011 15:23
Show Gist options
  • Save sebastiangeiger/1510674 to your computer and use it in GitHub Desktop.
Save sebastiangeiger/1510674 to your computer and use it in GitHub Desktop.
NLTK homework
import locale
import nltk
from nltk.corpus import wordnet as wn
from nltk.tag.simplify import simplify_wsj_tag
def main():
string = "I saw a man who is 98 years old and can still walk and tell jokes."
tokens = nltk.word_tokenize(string)
print nice_formatting(combinations_without_wordtype(tokens)) + " possible combinations when not taking wordtype into account"
print nice_formatting(combinations_with_wordtype(tokens)) + " possible combinations when taking wordtype into account"
def combinations_without_wordtype(tokens):
without_wordtype = 1;
for token in tokens:
number_of_synsets = len(wn.synsets(token))
# print str(number_of_synsets) + " x "+ token
without_wordtype *= max(1,number_of_synsets)
return without_wordtype
def combinations_with_wordtype(tokens):
with_wordtype = 1;
for token,word_class in nltk.pos_tag(tokens):
translatable, word_class = translate_to_wordnet_tags(word_class)
if translatable :
number_of_synsets = len(wn.synsets(token,word_class))
else:
number_of_synsets = 1
# print str(number_of_synsets) + " x "+ token + "(" + word_class + ")"
with_wordtype *= max(1,number_of_synsets)
return with_wordtype
def translate_to_wordnet_tags(parser_tag):
parser_tag = simplify_wsj_tag(parser_tag)
if(parser_tag=='N'):
return True, wn.NOUN
elif (parser_tag=='V' or parser_tag=='VD' or parser_tag=='VG' or parser_tag=='VN' or parser_tag=='MOD'):
return True, wn.VERB
elif (parser_tag=='ADJ'):
return True, wn.ADJ
elif (parser_tag=='ADV'):
return True, wn.ADV
elif (parser_tag=='.'):
return False, '.'
return False, "Unknown tag: "+parser_tag
def nice_formatting(integer):
locale.setlocale(locale.LC_ALL, 'en_US')
string = locale.format("%d", integer, grouping=True)
return string
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment