Skip to content

Instantly share code, notes, and snippets.

@xi
Last active February 9, 2018 14:12
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xi/8a2774c2e82682fa34a4cd1e621fcf5a to your computer and use it in GitHub Desktop.
Save xi/8a2774c2e82682fa34a4cd1e621fcf5a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""Get translations from leo.org on the command line."""
import argparse
import itertools
import collections
import xml.etree.ElementTree as ET
import requests
__version__ = '0.0.0'
VOCABS = ['ende', 'frde', 'esde', 'itde', 'chde', 'rude', 'ptde', 'plde']
def get_text(node):
return ''.join(node.itertext())
def get_data(vocab, query):
url = 'https://pda.leo.org/dictQuery/m-vocab/%s/query.xml' % vocab
r = requests.get(url, params={'search': query})
root = ET.fromstring(r.text)
sections = collections.OrderedDict()
for section in root.iter('section'):
title = section.get('sctTitle')
sections[title] = []
for entry in section.iter('entry'):
sides = []
for side in entry.iter('side'):
sides.append(get_text(side.find('.//repr')))
sections[title].append(sides)
return sections
def get_max_len(data):
max_len = 0
for section in data.values():
for entry in section:
for side in entry:
if len(side) > max_len:
max_len = len(side)
return max_len
def wrap(s, width=80, indent=0):
words = s.split()
lines = []
for word in words:
if (len(lines) == 0):
lines.append(word)
elif len(lines[-1] + ' ' + word) > width:
lines.append(' ' * indent + word)
else:
lines[-1] += ' ' + word
return lines
def iter_column_lines(a_raw, b_raw, width=80):
a_wrapped = wrap(a_raw, width=width, indent=2)
b_wrapped = wrap(b_raw, width=width, indent=2)
for a, b in itertools.zip_longest(a_wrapped, b_wrapped, fillvalue=''):
offset = max(0, width - len(a)) + 2
line = a + ' ' * offset + b
yield line.rstrip()
def print_data(data, width=80):
max_len = get_max_len(data)
column_width = int((width - 4) / 2)
column_width = min(max_len, column_width)
for title, section in data.items():
print(title)
for entry in section:
for line in iter_column_lines(*entry, width=column_width):
print(' ' + line)
print('')
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
'--version', '-V', action='version', version=__version__)
parser.add_argument('query')
parser.add_argument(
'--vocab', '-v', choices=VOCABS, default=VOCABS[0])
parser.add_argument(
'--width', '-w', type=int, default=80)
return parser.parse_args()
def main():
args = parse_args()
data = get_data(args.vocab, args.query)
print_data(data, args.width)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment