Skip to content

Instantly share code, notes, and snippets.

@jubnzv
Last active December 16, 2018 18:35
Show Gist options
  • Save jubnzv/4e6f811bc7b39e3ab55e504c71c9bb03 to your computer and use it in GitHub Desktop.
Save jubnzv/4e6f811bc7b39e3ab55e504c71c9bb03 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# Usage example:
#
# echo "hello\nworld" > words.txt
# ./dict_generator.py > dict.md
# pandoc -s dict.md -o dict.doc
#
import bs4
import time
import requests
INPUT_FNAME = "words.txt"
def yandex_translate(word):
data = {
"text": word,
"format": "plain",
"lang": "en-ru",
# Stolen from: https://github.com/dveselov/python-yandex-translate/blob/master/yandex_translate/__init__.py
"key": "trnsl.1.1.20130421T140201Z.323e508a33e9d84b.f1e0d9ca9bcd0a00b0ef71d82e6cf4158183d09e"}
response = requests.post("https://translate.yandex.net/api/v1.5/tr.json/translate", params=data, proxies=None)
return response.json().get("text")[0]
def oxford(word, session):
if word and session:
u = f"https://en.oxforddictionaries.com/search?filter=dictionary&query={word}"
headers = {
"Accept" : "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"X-Requested-With": "XMLHttpRequest",
"Referer": "https://en.oxforddictionaries.com/",
"Host": "en.oxforddictionaries.com"}
response = session.get(u, headers=headers, allow_redirects=True)
soup = bs4.BeautifulSoup(response.text, "html.parser")
try:
pron = soup.find("span", class_="phoneticspelling").get_text()
pron = f"*{pron}*" # italic
except:
pron = ''
try:
meaning = soup.find("span", class_="ind").get_text()
except:
meaning = ''
try:
example = soup.find("div", {"class": "ex"}).get_text()
example = example[2].upper() + example[3:len(example)-1] + '.'
except:
example = ''
try:
synonym = soup.find("div", {"class": "exs"}).get_text()
except:
synonym = ''
if "Retry" in response.text[0:30]:
time.sleep(5)
return f" {pron} | {meaning} | {synonym} | {example} "
return None
if __name__ == "__main__":
words = []
with open(INPUT_FNAME) as f:
words = [line.split()[0] for line in f]
s = requests.Session()
s.headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36"}
s.get("https://en.oxforddictionaries.com/")
print("№ | Word | Translation | Pronounce | Meaning | Synonyms | Example ")
print("--|------|-------------|-----------|---------|----------|---------")
for i, w in enumerate(words):
print(f"{i+1} | {w} | {yandex_translate(w)} | {oxford(w, s)} |")
# vim:foldmethod=marker:foldenable:tw=120
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment