Skip to content

Instantly share code, notes, and snippets.

@skychan
Created August 31, 2017 06:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skychan/45c2b56efee927420d058e2c453e88b6 to your computer and use it in GitHub Desktop.
Save skychan/45c2b56efee927420d058e2c453e88b6 to your computer and use it in GitHub Desktop.
Grab definition from youdao dict and generate the word book for reviewing
import re,json,requests
from bs4 import BeautifulSoup
from bs4 import CData
import sys,os
web_url = u'http://dict.youdao.com/search?keyfrom=dict.top&q='
def readfile(s):
with open(s,'r') as f:
lines = f.readlines()
f.close()
return lines
def query(word):
global web_url
html = requests.get(web_url + word)
soup = BeautifulSoup(html.text,"lxml")
root = soup.find(id='results-contents')
trans = root.find(id='phrsListTab').find(class_='trans-container')
result = [tran.string for tran in trans.find_all('li')]
return '\n'.join(result)
def addword(root,words):
item = root.new_tag("item")
word = root.new_tag("word")
trans = root.new_tag("trans")
tags = root.new_tag("tags")
tags.string = "TOEFL"
trans.string = CData(query(words))
word.string = words
item.append(word)
item.append(trans)
item.append(tags)
root.wordbook.append(item)
if __name__ == "__main__":
root = BeautifulSoup("<wordbook></wordbook>","xml")
words = readfile(sys.argv[1])
for word in words:
addword(root, word.strip())
with open(sys.argv[2],'w',encoding='utf-8') as f:
f.write(str(root.wordbook))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment