Skip to content

Instantly share code, notes, and snippets.

Created January 22, 2017 14:42
Show Gist options
  • Save anonymous/581b16a5d07746cc39cd4e46ad0a950f to your computer and use it in GitHub Desktop.
Save anonymous/581b16a5d07746cc39cd4e46ad0a950f to your computer and use it in GitHub Desktop.
#! /usr/bin/python
# -*- coding: utf-8 -*-
import re
import urllib2
import csv
import pandas as pd
def GetHtmlcode(ID):
# Get the webpage's source html code
source = 'http://goodinfo.tw/StockInfo/StockDetail.asp?STOCK_ID='
url = source + ID
print url
# Header
headers = { 'User-Agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset' : 'Big5,utf-8;q=0.7,*;q=0.3',
#'Accept-Encoding' : 'gzip,deflate,sdch',
'Accept-Language' : 'zh-TW,zh;q=0.8,en-US;q=0.6,en;q=0.4,ja;q=0.2' ,
'Cache-Control' : 'max-age=0',
'Connection' : 'keep-alive',
'Cookie' : '427 bytes were stripped',
'Host' : 'www.goodinfo.tw',
'Referer' : url }
req= urllib2.Request(url,"",headers)
response = urllib2.urlopen(req)
result = response.read().decode('utf-8')
return result
def main():
page = GetHtmlcode('2103')
df=pd.read_html(page)
df2=pd.DataFrame(df[41])
print df2
print "\n\n"
df2 = df2[2:] ## 只要第二列之後
print df2
print df2.info()
print "\n\n"
df2.columns = df2.iloc[0] # 把第一列當作 columns 名稱
print df2
print "\n\n"
print df2.info()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment