Skip to content

Instantly share code, notes, and snippets.

@emptyhua
Created January 4, 2019 02:41
Show Gist options
  • Save emptyhua/b1285207ce460eff349352ad173c64d5 to your computer and use it in GitHub Desktop.
Save emptyhua/b1285207ce460eff349352ad173c64d5 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import requests
import re
from lxml import etree
import genanki
import tempfile
import sys
import os
import io
FIND_WORD = re.compile(r'<a class="word" href="([^"]+)">([^<]+)</a>')
FIND_SPELL = re.compile(r'<span class="word-spell">([^<]+)</span><span class="word-spell-audio" data-url="([^"]+)"></span>')
css = '''
.card {
color:#43c18a;
background:#000;
font-family: "Courier New";
text-align:center;
font-size:20px;
font-weight:bold;
}
.word {font-size:36px;}
.spell {font-size:24px;}
.detail {text-align:left;}
'''
qfmt = '''
<div class="word">{{Word}} </div>
<div class="spell">{{Spell}}</div>
{{Mp3}}
'''
afmt = '''
{{FrontSide}}
<hr id="answer">
<div class="detail">
{{Explan}}
</div>
'''
my_model = genanki.Model(
778899,
'Computer English',
fields=[
{'name': 'Word'},
{'name': 'Spell'},
{'name': 'Explan'},
{'name': 'Mp3'},
],
css = css,
templates=[
{
'name': 'Card1',
'qfmt': qfmt,
'afmt': afmt,
},
])
my_deck = genanki.Deck(77889901, 'Computer English')
workDir = sys.argv[1]
os.chdir(workDir)
sounds = []
for i in range(7):
r = requests.get('https://www.koolearn.com/dict/tag_1953_%d.html' % (i+1))
for match in FIND_WORD.findall(r.text):
link, word = match
print('add', word)
r = requests.get('https://www.koolearn.com' + link)
spell_matches = FIND_SPELL.findall(r.text)
if len(spell_matches) < 2:
print("ERROR:can't find spell for", word)
continue
tree = etree.HTML(r.text)
rt = tree.xpath("//div[@class='details-content']")
if len(rt) < 2:
print("ERROR:can't find detail for", word)
continue
detail = ''.join([etree.tostring(child).decode('utf-8') for child in rt[1].iterchildren()])
mp3 = word + '.mp3'
sounds.append(mp3)
if not os.path.isfile(mp3):
r = requests.get('http://dict.youdao.com/dictvoice?audio=' + word + '&type=1', allow_redirects=True)
io.open(mp3, 'wb').write(r.content)
my_note = genanki.Note(model=my_model,
sort_field=word,
fields=[word, spell_matches[1][0], detail, '[sound:'+mp3+']'])
my_deck.add_note(my_note)
my_package = genanki.Package(my_deck)
my_package.media_files = sounds
my_package.write_to_file('output.apkg')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment