import urllib2 as ul2 import urlparse as up import re import sys import webbrowser as wb jissite="http://kikakurui.com/" home=ul2.urlopen(jissite).read().split('\n') pat=re.compile('<a href=".*index\.html">') linktojis=filter(lambda x:pat.search(x) is not None,home) linkdict={} def jis(cha,num): ''' search page for 'JIS cha num' from jissite ''' cha=cha.upper() rangepat=re.compile('{} (\d{{4}}-\d{{4}})</a>'.format(cha)) path=None for p in linktojis: s=rangepat.search(p) if s is not None: rg=s.groups()[0].split('-') if int(rg[0])<=num<=int(rg[1]): path=p.split('"')[3] break if path is None: print "Cannot find pages for JIS {} {}".format(cha,num) return url1=up.urljoin(jissite,path) try: nextpage=linkdict[url1] except KeyError: nextpage=ul2.urlopen(url1).read().split('\n') linkdict[url1]=nextpage for i,j in enumerate(nextpage): if '{}{}'.format(cha,num) in j: print u"JIS {} {}: {}".format(cha,num,nextpage[i+1].split('>')[1].split('<')[0].decode('utf-8')) return up.urljoin(url1,j.split('"')[1]) def parsepage(url): ''' split original html file into each page. deleting garbages ''' pass if __name__=='__main__': if len(sys.argv)>=3: fig=sys.argv[1] try: num=int(sys.argv[2]) except ValueError: sys.exit() else: fig=raw_input("Character: ") try: num=int(raw_input("Number: ")) except ValueError: sys.exit() wb.open(jis(fig,num))