Created
November 1, 2017 19:18
-
-
Save d33tah/4f28a4506f25921176d3e62f7ac9e247 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import requests | |
import lxml.html | |
import subprocess | |
import os | |
import random | |
def digit_to_nondigit(x): | |
h = lxml.html.fromstring(requests.get('https://en.wiktionary.org/wiki/' + x).text) | |
return h.xpath('//* [contains(.,"Alternative spelling of")]/span/i/a')[0].text | |
def download_link(x): | |
url = 'https://commons.wikimedia.org/w/index.php?title=File:' + x | |
h = lxml.html.fromstring(requests.get(url).text) | |
return h.xpath('//a [@class="internal"]')[0].get('href') | |
def get_zh_filenames(): | |
# return open('zh-filenames.txt').readlines() | |
url = 'https://dumps.wikimedia.org/enwiktionary/20171020/enwiktionary-20171020-pages-meta-current.xml.bz2' | |
cmd = "curl %s | bzcat | grep '{{audio|zh-' -i | egrep 'lang=zh|lang=cmn'" | |
p = subprocess.Popen(cmd % url, stdout=subprocess.PIPE, shell=True) | |
return p.stdout | |
def download_everything(): | |
try: | |
os.mkdir('out') | |
except OSError: | |
pass | |
for l in get_zh_filenames(): | |
fn = l.split('|')[1] | |
display_name = fn.replace('Zh-', '')[:-4] | |
if any([str(x) in display_name for x in range(10)]): | |
display_name = digit_to_nondigit(display_name) | |
out_path = 'out/' + display_name | |
if os.path.exists(out_path): | |
continue | |
url = download_link(fn) | |
subprocess.call(['wget', '--no-verbose', url, '-O', out_path]) | |
def generate_challenge(): | |
l = os.listdir('out') | |
w = random.choice(l) | |
options = [] | |
while options.count(w) != 1: | |
random.shuffle(l) | |
options = l[:15] + [w] | |
return w, options | |
def ask_one(): | |
w, options = generate_challenge() | |
correct = options.index(w) + 1 | |
given = None | |
while given != correct: | |
subprocess.call('clear') | |
for n, option in enumerate(options): | |
print('%d\t%s' % (n+1, option)) | |
subprocess.call(['mplayer', 'out/' + w], | |
stderr=subprocess.PIPE, stdout=subprocess.PIPE) | |
given = int(raw_input().rstrip()) | |
if given == 0: | |
continue | |
download_everything() | |
while True: | |
ask_one() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment