Created
August 31, 2017 06:41
-
-
Save skychan/45c2b56efee927420d058e2c453e88b6 to your computer and use it in GitHub Desktop.
Grab definition from youdao dict and generate the word book for reviewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re,json,requests | |
from bs4 import BeautifulSoup | |
from bs4 import CData | |
import sys,os | |
web_url = u'http://dict.youdao.com/search?keyfrom=dict.top&q=' | |
def readfile(s): | |
with open(s,'r') as f: | |
lines = f.readlines() | |
f.close() | |
return lines | |
def query(word): | |
global web_url | |
html = requests.get(web_url + word) | |
soup = BeautifulSoup(html.text,"lxml") | |
root = soup.find(id='results-contents') | |
trans = root.find(id='phrsListTab').find(class_='trans-container') | |
result = [tran.string for tran in trans.find_all('li')] | |
return '\n'.join(result) | |
def addword(root,words): | |
item = root.new_tag("item") | |
word = root.new_tag("word") | |
trans = root.new_tag("trans") | |
tags = root.new_tag("tags") | |
tags.string = "TOEFL" | |
trans.string = CData(query(words)) | |
word.string = words | |
item.append(word) | |
item.append(trans) | |
item.append(tags) | |
root.wordbook.append(item) | |
if __name__ == "__main__": | |
root = BeautifulSoup("<wordbook></wordbook>","xml") | |
words = readfile(sys.argv[1]) | |
for word in words: | |
addword(root, word.strip()) | |
with open(sys.argv[2],'w',encoding='utf-8') as f: | |
f.write(str(root.wordbook)) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment