Skip to content

Instantly share code, notes, and snippets.

@niyumard
Last active October 27, 2020 14:53
Show Gist options
  • Save niyumard/eed1799119bb3022604322cefdc39489 to your computer and use it in GitHub Desktop.
Save niyumard/eed1799119bb3022604322cefdc39489 to your computer and use it in GitHub Desktop.
This script looks up esperanto words in Lernu.net website, this script is specially made for being used via GoldenDict. Usage: >python lernulookup.py %GDWORD%
#!/usr/bin/python
import requests,re, sys
from bs4 import BeautifulSoup
URL = 'https://lernu.net/en/vortaro'
client = requests.session()
# Retrieve the CSRF token first
z = client.get(URL) # sets cookie
if 'YII_CSRF_TOKEN' in client.cookies and 'PHPSESSID' in client.cookies:
#csrftoken = client.cookies['YII_CSRF_TOKEN']
match = re.search(r"csrfToken: '(.*?)'", z.text)
csrftoken = match.group(1)
phpsessid = client.cookies['PHPSESSID']
else:
print("Seesion Failure")
cookiee = "PHPSESSID=" + phpsessid + "; YII_CSRF_TOKEN=" + csrftoken + "; lang=en"
header = {
'Accept':'*/*',
"Accept-Encoding": "gzip, deflate, br",
'Accept-Language': 'en-US,en;q=0.5',
'Connection': 'keep-alive',
'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',
'Cookie': cookiee,
'DNT':'1',
'Host':'lernu.net',
'Origin':'https://lernu.net',
'Sec-GPC':'1',
'Referer': URL,
"User-Agent":"Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0",
'X-Requested-With':'XMLHttpRequest'
}
cookiee_in_request = {
'lang': 'en',
'PHPSESSID': phpsessid,
'YII_CSRF_TOKEN': csrftoken
}
listoflangs = ["eo|fr","eo|en","eo|fa"]
for i in listoflangs:
login_data = {"YII_CSRF_TOKEN":csrftoken, 'DictWords[word]': sys.argv[1], "DictWords[dictionary]": i}
try:
r = client.post(URL, data=login_data, headers=header, cookies=cookiee_in_request)
if r.status_code == 200:
pass
else:
print("Failed connecting to the server.")
soup = BeautifulSoup(r.text, 'html.parser')
if 'class="empty"' not in str(soup.find(id="dictionary-search-results").findAll('ul')):
print(soup.find(id="dictionary-search-results").findAll('ul')[0])
else:
print("Erorr parsing website text.")
except:
print("Failed.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment