Skip to content

Instantly share code, notes, and snippets.

@mikeyee
Last active November 12, 2019 13:17
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mikeyee/a6588d0209076c33bf699c62006a192a to your computer and use it in GitHub Desktop.
Save mikeyee/a6588d0209076c33bf699c62006a192a to your computer and use it in GitHub Desktop.
#載入程式庫
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd
import requests
import re
#要追蹤的股票
stock='01810'
#以Firefox開啟港交所詳盡搜尋股份專頁
driver = webdriver.Firefox()
driver.get("http://www.hkexnews.hk/listedco/listconews/advancedsearch/search_active_main_c.aspx")
assert "HKEXnews" in driver.title
#到股票編號的輸入欄,輸入股票編號,再到搜尋日子的長度輸入過去十二個月,然後提交表格
elem = driver.find_element_by_name("ctl00$txt_stock_code")
elem.clear()
elem.send_keys(stock)
driver.find_element_by_xpath("//select[@name='ctl00$sel_defaultDateRange']/option[text()='過去十二個月']").click()
elem.submit()
assert "No results found." not in driver.page_source
#稍等數秒,待網頁下載完畢再取得內容
time.sleep(4)
page=driver.page_source
driver.close()
#再以beautifulsoup作分析,抽取有用內容
soup = BeautifulSoup(page, 'html.parser')
date_tags=[]
date1_tags=[]
title_tags=[]
title2_tags=[]
link_tags=[]
tmp=soup.find_all('span', id=re.compile('^ctl00_gvMain_ctl.*DateTime$'))
date_tags= [i.get_text() for i in tmp]
tmp=soup.find_all('span', id=re.compile('^ctl00_gvMain_ctl.*ShortText$'))
title_tags=[i.get_text() for i in tmp]
tmp=soup.find_all('a', id=re.compile('^ctl00_gvMain_ctl.*hlTitle$'))
title2_tags=[i.get_text() for i in tmp]
tmp=soup.find_all('a', id=re.compile('^ctl00_gvMain_ctl.*hlTitle$'))
link_tags=[i['href'] for i in tmp]
for i in date_tags:
tmp=i[:-5]+" "+i[-5:]
date1_tags.append(tmp)
for i in range(len(link_tags)):
link_tags[i]='http://www.hkexnews.hk'+link_tags[i]
#將所有資料存入數據列表變數內
announce_table = pd.DataFrame({
"日期": date1_tags,
"通告類別": title_tags,
"標題": title2_tags,
"連結": link_tags
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment