Created
May 19, 2019 04:21
-
-
Save timtan/3fc38d0b92a0e39031a0f03415ab6c0b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#coding: utf-8 | |
import requests | |
import pandas as pd | |
import numpy as np | |
from bs4 import BeautifulSoup as bs | |
def financial_statement(year, season, type='綜合損益彙總表'): | |
if year >= 1000: | |
year -= 1911 | |
if type == '綜合損益彙總表': | |
url = 'http://mops.twse.com.tw/mops/web/ajax_t163sb04' | |
elif type == '資產負債彙總表': | |
url = 'http://mops.twse.com.tw/mops/web/ajax_t163sb05' | |
elif type == '營益分析彙總表': | |
url = 'http://mops.twse.com.tw/mops/web/ajax_t163sb06' | |
else: | |
print('type does not match') | |
r = requests.post(url, { | |
'encodeURIComponent':1, | |
'step':1, | |
'firstin':1, | |
'off':1, | |
'TYPEK':'sii', | |
'year':str(year), | |
'season':str(season), | |
}) | |
r.encoding = 'utf8' | |
soup = bs(r.text, 'lxml') | |
all_df = [] | |
for tb in soup.select('table')[1:]: | |
dfs = pd.read_html(tb.prettify('utf-8'), encoding= 'utf-8') | |
all_df.append(pd.concat(dfs)) | |
return pd.concat(all_df) | |
df = financial_statement(2019, 1) | |
df.to_excel("test.xls") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment