Last active
November 30, 2015 17:26
-
-
Save anna-is-cute/44aba56fe9c75d26dff3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from requests import get | |
from bs4 import BeautifulSoup | |
from itertools import chain | |
API_KEY = 'topkek' | |
with open('1000.dicin') as f: | |
words = f.read().split('\n') | |
# Alternatively, if you don't feel like saving 1000.dicin somewhere | |
# words = get('http://splasho.com/upgoer5/phpspellcheck/dictionaries/1000.dicin').text.split('\n') | |
def get_synonyms(word): | |
return list(chain(*[x.text.split(', ') for x in BeautifulSoup(get('http://www.dictionaryapi.com/api/v1/references/thesaurus/xml/{word}?key={key}'.format(word=word, key=API_KEY)).content, 'lxml').find_all('syn')])) | |
def get_simple_synonyms(word): | |
return [x for x in get_synonyms(word) if x in words] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment