hzqtc/dict-cli.py

## dict-cli.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import getopt
import lxml.html
import re
import subprocess
import sys
import urllib

from clint.textui import puts, indent, colored

class WordPronunciation(object):

    def __init__(self):
        self.phonetic_symbol = ''
        self.sound_url = ''

class WordDefinition(object):

    def __init__(self):
        self.explaination = ''
        self.examples = []

class Word(object):

    def __init__(self):
        self.spell = ''
        self.pronunciation = {}
        self.definition = []

    def pronounce(self, prefer = 'american', repeat = 3):
        if prefer in self.pronunciation and self.pronunciation[prefer].sound_url != '':
            self.play(self.pronunciation[prefer].sound_url, repeat)
        else:
            for k in self.pronunciation:
                if self.pronunciation[k].sound_url != '':
                    self.play(self.pronunciation[k].sound_url, repeat)

    def play(self, url, repeat):
        subprocess.call(['mpg123', '--loop', str(repeat), '-q', url], stderr = open('/dev/null'))

    def display(self):
        puts('%s\t[%s]' % (colored.blue('Pronunciation:'), colored.red(self.spell)))
        for k in self.pronunciation:
            with indent(2):
                puts((u'%s %s\t[%s]\n  %s' % (colored.magenta(u'◆'), k.capitalize(),
                    colored.red(self.pronunciation[k].phonetic_symbol),
                    colored.yellow(self.pronunciation[k].sound_url))).encode('utf-8'))
        puts()
        puts(colored.blue('Definiteion:'))
        for d in self.definition:
            with indent(2):
                puts((u'%s %s' % (colored.magenta(u'◆'), highlight_word(d.explaination, self.spell))).encode('utf-8'))
                puts()
                for e in d.examples:
                    with indent(2):
                        puts((u'%s\n%s' % (colored.yellow(e[0]), colored.green(e[1]))).encode('utf-8'))
                        puts()

def highlight_word(s, word):
    cap_word = word.capitalize()
    return s.replace(word, str(colored.red(word))).replace(cap_word, str(colored.red(cap_word)))

def strip_mid(s):
    return re.sub('\s+', ' ', s)

def parseWord(word_spell):
    url = "http://www.iciba.com/%s" % urllib.quote_plus(word_spell)
    html = urllib.urlopen(url).read()
    doc = lxml.html.fromstring(html)
    word = Word()

    try:
        word.spell = doc.xpath('//h1[@id="word_name_h1"]/text()')[0]

        pronuns = doc.xpath('//div[@id="dict_main"]/div[@class="dictbar"]//span[@class="eg"]')
        if pronuns:
            british_eng = WordPronunciation()
            try:
                british_eng.phonetic_symbol = pronuns[0].xpath('span[@class="fl"]/strong')[1].text
                british_eng.sound_url = pronuns[0].xpath('a[@class="ico_sound"]/@onclick')[0].split("'")[1]
            except:
                pass
            word.pronunciation['british'] = british_eng

        if len(pronuns) > 1:
            american_eng = WordPronunciation()
            try:
                american_eng.phonetic_symbol = pronuns[1].xpath('span[@class="fl"]/strong')[1].text
                american_eng.sound_url = pronuns[1].xpath('div[@class="vCri"]/a[@class="vCri_laba"]/@onclick')[0].split("'")[1]
            except:
                pass
            word.pronunciation['american'] = american_eng

        # remove trobulesome elements
        map(lambda e: e.getparent().remove(e), doc.xpath('//div[@class="collins_en_cn"]//span[@class="num"]'))
        map(lambda e: e.getparent().remove(e), doc.xpath('//div[@class="collins_en_cn"]//div[@class="tips_main"]'))
        map(lambda e: e.getparent().remove(e), doc.xpath('//div[@class="collins_en_cn"]//span[@class="tips_box"]'))
        # start parsing collins
        defs = doc.xpath('//div[@class="collins_en_cn"]')
        for d in defs:
            word_def = WordDefinition()
            caption = d.xpath('div[@class="caption"]')
            if caption:
                word_def.explaination = strip_mid(caption[0].text_content()).strip()
                for e in d.xpath('ul/li[not(@class)]'):
                    examp_en, examp_cn = map(lambda s: strip_mid(s.text_content()).strip(), e.xpath('p'))
                    word_def.examples.append((examp_en, examp_cn))
                word.definition.append(word_def)

    except Exception as e:
        print str(e)

    return word

def usage():
    print "Command line English-Chinese Dictionary. Data comes from http://www.iciba.com/."
    print "Usage: dict-cli.py [options] [word]"
    print "  -h, --help         Print this infomation."
    print "  -n, --nopronounce  Don't pronounce [default do]."
    print "  -p, --prefer=b|a   Prefer British or American pronunciation [default American]."
    print "  -r, --repeat=NUM   Repeat times [default 3, 0 for infinite loop]."

if __name__ == '__main__':
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hnp:r:', ['help', 'nopronounce', 'prefer=', 'repeat='])
    except getopt.GetoptError as err:
        print str(err)
        usage()
        sys.exit(2)

    pronounce = True
    prefer = 'american'
    repeat = 3

    for o, a in opts:
        if o in ('-h', '--help'):
            usage()
            sys.exit()
        elif o in ('-n', '--nopronounce'):
            pronounce = False
        elif o in ('-p', '--prefer') and a in ('a', 'b', 'american', 'british'):
            if a == 'a':
                prefer = 'american'
            elif a == 'b':
                prefer = 'british'
            else:
                prefer = a
        elif o in ('-r', '--repeat') and int(a) >= 0:
            repeat = int(a)

    if not args:
        usage()
        sys.exit(2)

    word = parseWord(args[0])
    word.display()
    if pronounce:
        word.pronounce(prefer, repeat)
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import getopt
	import lxml.html
	import re
	import subprocess
	import sys
	import urllib

	from clint.textui import puts, indent, colored

	class WordPronunciation(object):

	def __init__(self):
	self.phonetic_symbol = ''
	self.sound_url = ''

	class WordDefinition(object):

	def __init__(self):
	self.explaination = ''
	self.examples = []

	class Word(object):

	def __init__(self):
	self.spell = ''
	self.pronunciation = {}
	self.definition = []

	def pronounce(self, prefer = 'american', repeat = 3):
	if prefer in self.pronunciation and self.pronunciation[prefer].sound_url != '':
	self.play(self.pronunciation[prefer].sound_url, repeat)
	else:
	for k in self.pronunciation:
	if self.pronunciation[k].sound_url != '':
	self.play(self.pronunciation[k].sound_url, repeat)

	def play(self, url, repeat):
	subprocess.call(['mpg123', '--loop', str(repeat), '-q', url], stderr = open('/dev/null'))

	def display(self):
	puts('%s\t[%s]' % (colored.blue('Pronunciation:'), colored.red(self.spell)))
	for k in self.pronunciation:
	with indent(2):
	puts((u'%s %s\t[%s]\n %s' % (colored.magenta(u'◆'), k.capitalize(),
	colored.red(self.pronunciation[k].phonetic_symbol),
	colored.yellow(self.pronunciation[k].sound_url))).encode('utf-8'))
	puts()
	puts(colored.blue('Definiteion:'))
	for d in self.definition:
	with indent(2):
	puts((u'%s %s' % (colored.magenta(u'◆'), highlight_word(d.explaination, self.spell))).encode('utf-8'))
	puts()
	for e in d.examples:
	with indent(2):
	puts((u'%s\n%s' % (colored.yellow(e[0]), colored.green(e[1]))).encode('utf-8'))
	puts()

	def highlight_word(s, word):
	cap_word = word.capitalize()
	return s.replace(word, str(colored.red(word))).replace(cap_word, str(colored.red(cap_word)))

	def strip_mid(s):
	return re.sub('\s+', ' ', s)

	def parseWord(word_spell):
	url = "http://www.iciba.com/%s" % urllib.quote_plus(word_spell)
	html = urllib.urlopen(url).read()
	doc = lxml.html.fromstring(html)
	word = Word()

	try:
	word.spell = doc.xpath('//h1[@id="word_name_h1"]/text()')[0]

	pronuns = doc.xpath('//div[@id="dict_main"]/div[@class="dictbar"]//span[@class="eg"]')
	if pronuns:
	british_eng = WordPronunciation()
	try:
	british_eng.phonetic_symbol = pronuns[0].xpath('span[@class="fl"]/strong')[1].text
	british_eng.sound_url = pronuns[0].xpath('a[@class="ico_sound"]/@onclick')[0].split("'")[1]
	except:
	pass
	word.pronunciation['british'] = british_eng

	if len(pronuns) > 1:
	american_eng = WordPronunciation()
	try:
	american_eng.phonetic_symbol = pronuns[1].xpath('span[@class="fl"]/strong')[1].text
	american_eng.sound_url = pronuns[1].xpath('div[@class="vCri"]/a[@class="vCri_laba"]/@onclick')[0].split("'")[1]
	except:
	pass
	word.pronunciation['american'] = american_eng

	# remove trobulesome elements
	map(lambda e: e.getparent().remove(e), doc.xpath('//div[@class="collins_en_cn"]//span[@class="num"]'))
	map(lambda e: e.getparent().remove(e), doc.xpath('//div[@class="collins_en_cn"]//div[@class="tips_main"]'))
	map(lambda e: e.getparent().remove(e), doc.xpath('//div[@class="collins_en_cn"]//span[@class="tips_box"]'))
	# start parsing collins
	defs = doc.xpath('//div[@class="collins_en_cn"]')
	for d in defs:
	word_def = WordDefinition()
	caption = d.xpath('div[@class="caption"]')
	if caption:
	word_def.explaination = strip_mid(caption[0].text_content()).strip()
	for e in d.xpath('ul/li[not(@class)]'):
	examp_en, examp_cn = map(lambda s: strip_mid(s.text_content()).strip(), e.xpath('p'))
	word_def.examples.append((examp_en, examp_cn))
	word.definition.append(word_def)

	except Exception as e:
	print str(e)

	return word

	def usage():
	print "Command line English-Chinese Dictionary. Data comes from http://www.iciba.com/."
	print "Usage: dict-cli.py [options] [word]"
	print " -h, --help Print this infomation."
	print " -n, --nopronounce Don't pronounce [default do]."
	print " -p, --prefer=b\|a Prefer British or American pronunciation [default American]."
	print " -r, --repeat=NUM Repeat times [default 3, 0 for infinite loop]."

	if __name__ == '__main__':
	try:
	opts, args = getopt.getopt(sys.argv[1:], 'hnp:r:', ['help', 'nopronounce', 'prefer=', 'repeat='])
	except getopt.GetoptError as err:
	print str(err)
	usage()
	sys.exit(2)

	pronounce = True
	prefer = 'american'
	repeat = 3

	for o, a in opts:
	if o in ('-h', '--help'):
	usage()
	sys.exit()
	elif o in ('-n', '--nopronounce'):
	pronounce = False
	elif o in ('-p', '--prefer') and a in ('a', 'b', 'american', 'british'):
	if a == 'a':
	prefer = 'american'
	elif a == 'b':
	prefer = 'british'
	else:
	prefer = a
	elif o in ('-r', '--repeat') and int(a) >= 0:
	repeat = int(a)

	if not args:
	usage()
	sys.exit(2)

	word = parseWord(args[0])
	word.display()
	if pronounce:
	word.pronounce(prefer, repeat)