Skip to content

Instantly share code, notes, and snippets.

Last active March 7, 2023 22:24
Show Gist options
  • Save JorjMcKie/fc7774ec884fd930ee1d to your computer and use it in GitHub Desktop.
Save JorjMcKie/fc7774ec884fd930ee1d to your computer and use it in GitHub Desktop.
Accessing dictionary from Python
#!/usr/bin/env python
# -*- coding: latin-1 -*-
# Query the dictionary from within Python.
# This is based on an equivalent script for German/English by:
# Copyright (C) 2015 Ian Denhardt <>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <>.
# Usage:
# import LeoAccess as leo
# ret ="some string")
# 'ret' will be {} if nothing found or any error, or contain a dictionary with a
# variable number of 'section_names' (see below) as keys.
# The value of each key is a list of sub-dictionaries of word pairs
# {"sl": "source", "de": "german"}.
# When required, the dictionary 'sn_de' can be used to translate section_names
# into German.
# Dependencies:
# requests, lxml, io
import requests
from lxml import etree
from io import StringIO
# Constants
sl = 'es' # default source language (Spanish - Español)
# Currently (Feb 2016), the following languages are available in LEO:
# English (en), tested, 800 k-entries
# French (fr), tested, 250 k-entries
# Spanish (es), tested, 200 k-entries
# Italian (it), tested, 180 k-entries
# Chinese (ch), tested, 186 k-entries
# Russian (ru), tested, 272 k-entries
# Portuguese (pt), tested, 82 k-entries
# Polish (pl), tested, 59 k-entries
tl = 'de' # target language (German). Actually a constant:
# LEO is a German company
uri = '' # LEO uri
section_names = (
# for translating setion names to target language (German) - not used here
sn_de ={'subst':"Substantiv",
def _get_text(elt):
buf = StringIO()
def _helper(_elt):
if _elt.text is not None:
for child in _elt:
if _elt.tail is not None:
return buf.getvalue()
def search(term, lang = 'es', timeout = None):
'''term = search term
lang = source language, one of en, es, it, fr, pt, ch, ru, pt, pl
timeout = None or max. number of seconds to wait for response'''
sl = lang
url = uri % (sl, tl)
resp = requests.get(url, params={'search': term}, timeout=timeout)
ret = {}
if resp.status_code !=
return ret
p = etree.HTMLParser()
html = etree.parse(StringIO(resp.text), p)
for section_name in section_names:
section = html.find(".//div[@id='section-%s']" % section_name)
if section is None:
ret[section_name] = []
results = section.findall(".//td[@lang='%s']" % (sl,)) # source language
for r_sl in results:
r_tl = r_sl.find("./../td[@lang='%s']" % (tl,)) # target language
sl: _get_text(r_sl).strip(),
tl: _get_text(r_tl).strip(),
return ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment