Skip to content

Instantly share code, notes, and snippets.

@antlauzon
Created May 28, 2018 12:23
Show Gist options
  • Save antlauzon/b570c2a63e764ce0757612de13fd9168 to your computer and use it in GitHub Desktop.
Save antlauzon/b570c2a63e764ce0757612de13fd9168 to your computer and use it in GitHub Desktop.
Thee, Aggrandizer
import nltk
import nltk.data
import string
import sys
from nltk.corpus import wordnet
def wordnet_pos(treebank_tag):
if treebank_tag.startswith('J'):
return wordnet.ADJ
elif treebank_tag.startswith('V'):
return wordnet.VERB
elif treebank_tag.startswith('N'):
return wordnet.NOUN
elif treebank_tag.startswith('R'):
return wordnet.ADV
else:
return ''
def aggrandize(text):
exclude = set(string.punctuation)
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
# use wordnet to aggrandise text
aggrandised_content = []
sentences = nltk.sent_tokenize(text)
for sentence in sentences:
words = nltk.word_tokenize(sentence)
for i, (word, pos) in enumerate(nltk.pos_tag(words)):
if len(word) > 1:
synonyms = wordnet.synsets(word, pos=wordnet_pos(pos))
for synonym in synonyms:
for lemma in synonym.lemma_names():
if lemma > word:
word = lemma
if i == 0:
word = word.capitalize()
aggrandised_content.append(word)
return "".join([" "+i if not i.startswith("'")
and i not in string.punctuation else i
for i in aggrandised_content]).strip()
if __name__ == '__main__':
try:
nltk.data.load('tokenizers/punkt/english.pickle')
word_pos = nltk.pos_tag('foo bar quux xyzzy')
word, pos = word_pos[0]
wordnet.synsets(word, pos=wordnet_pos(pos))
except Exception as e:
print("downloading nltk requirements...")
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
print(aggrandize(' '.join(sys.argv[1:])))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment