Last active
July 31, 2018 07:53
-
-
Save resad1983/58161a026b061371a93fda833425496d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from bs4 import BeautifulSoup | |
import pyodbc | |
cnxn = pyodbc.connect('DRIVER={SQL Server}; SERVER=localhost;PORT=1433;DATABASE=BSMS_FLOW;UID=bsms;PWD=bsms') | |
cursor = cnxn.cursor() | |
chrome_path='C:\\Python37\\chromedriver.exe' | |
web = webdriver.Chrome(chrome_path) | |
web.get("https://www.ptt.cc/bbs/Food/index.html") | |
num_page = int(input("取的頁數 : ")) | |
while num_page > 0: | |
print('========新頁面=========') | |
c_url = web.current_url | |
web.get(c_url) | |
html = web.page_source | |
soup=BeautifulSoup(html,'html.parser') | |
container = soup.select('.r-ent') | |
for each_item in container: | |
#print('日期: ' + each_item.select('div.date')[0].text) | |
#print('作者: ' + each_item.select('div.author')[0].text) | |
#print('標題: ' + each_item.select('div.title')[0].text) | |
cursor.execute("insert into python_test(T_DAY,T_author,T_title)values('"+each_item.select('div.date')[0].text+"','"+each_item.select('div.author')[0].text+"','"+each_item.select('div.title')[0].text+"')") | |
try: | |
cnxn.commit() | |
except: | |
cnxn.rollback() | |
web.find_element_by_link_text('‹ 上頁').click() | |
num_page = num_page-1 | |
web.close() | |
cnxn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment