Skip to content

Instantly share code, notes, and snippets.

@nobodyzxc
Last active July 4, 2017 11:59
Show Gist options
  • Save nobodyzxc/7a9ce313aee190fc48d0efcc25ed099d to your computer and use it in GitHub Desktop.
Save nobodyzxc/7a9ce313aee190fc48d0efcc25ed099d to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2.7
import urllib2 , io
from HTMLParser import HTMLParser
####################### SETTING ########################
bank_url = "http://rate.bot.com.tw/xrt?Lang=zh-TW"
####################### PAESER IMPLEMENT ###############
class rateParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.getData = False
self.getCurrencyName = True
self.countRate = 0
self.lastTag = ""
def handle_starttag(self , tag , attrs):
self.lastTag = tag
if tag == "td":
self.getData = True;
def handle_data(self , data):
if self.getData == True:
if self.lastTag == 'td' and self.countRate < 4:
if data.strip() != "":
print '%-7s\t' %data.strip() , " " ,
self.countRate += 1
if self.lastTag == 'div' and self.getCurrencyName == True:
if data.strip() != "":
print data.strip() , "\t" ,
self.getCurrencyName = False
def handle_endtag(self , tag):
if tag == 'td' and self.getData == True:
self.getData = False
if tag == 'tr':
if self.getCurrencyName == False:
print ""
self.getCurrencyName = True
self.countRate = 0
####################### MAIN ######################
if __name__ == '__main__':
req = urllib2.Request(bank_url)#declear a http request
req.add_header("User-Agent" , 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')#pretend firefox
response = urllib2.urlopen(req)#download html file
web_page = response.read()#read html file
rateParser().feed(web_page)#analysis html file
#!/usr/bin/env python3
import io , urllib
from urllib.request import urlopen
from html.parser import HTMLParser
####################### SETTING ########################
bank_url = "http://rate.bot.com.tw/xrt?Lang=zh-TW"
####################### PAESER IMPLEMENT ###############
class rateParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.getData = False
self.getCurrencyName = True
self.countRate = 0
self.lastTag = ""
def handle_starttag(self , tag , attrs):
self.lastTag = tag
if tag == "td":
self.getData = True;
def handle_data(self , data):
if self.getData == True:
if self.lastTag == 'td' and self.countRate < 4:
if data.strip() != "":
print('{:7}\t '.format(data.strip()) , end='')
self.countRate += 1
if self.lastTag == 'div' and self.getCurrencyName == True:
if data.strip() != "":
print('{} \t '.format(data.strip()), end='')
self.getCurrencyName = False
def handle_endtag(self , tag):
if tag == 'td' and self.getData == True:
self.getData = False
if tag == 'tr':
if self.getCurrencyName == False:
print("")
self.getCurrencyName = True
self.countRate = 0
####################### MAIN ######################
if __name__ == '__main__':
response = urlopen(bank_url)#download html file
rateParser().feed((response.read().decode('utf-8')))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment