Skip to content

Instantly share code, notes, and snippets.

@resad1983
Last active July 31, 2018 07:53
Show Gist options
  • Save resad1983/58161a026b061371a93fda833425496d to your computer and use it in GitHub Desktop.
Save resad1983/58161a026b061371a93fda833425496d to your computer and use it in GitHub Desktop.
from selenium import webdriver
from bs4 import BeautifulSoup
import pyodbc
cnxn = pyodbc.connect('DRIVER={SQL Server}; SERVER=localhost;PORT=1433;DATABASE=BSMS_FLOW;UID=bsms;PWD=bsms')
cursor = cnxn.cursor()
chrome_path='C:\\Python37\\chromedriver.exe'
web = webdriver.Chrome(chrome_path)
web.get("https://www.ptt.cc/bbs/Food/index.html")
num_page = int(input("取的頁數 : "))
while num_page > 0:
print('========新頁面=========')
c_url = web.current_url
web.get(c_url)
html = web.page_source
soup=BeautifulSoup(html,'html.parser')
container = soup.select('.r-ent')
for each_item in container:
#print('日期: ' + each_item.select('div.date')[0].text)
#print('作者: ' + each_item.select('div.author')[0].text)
#print('標題: ' + each_item.select('div.title')[0].text)
cursor.execute("insert into python_test(T_DAY,T_author,T_title)values('"+each_item.select('div.date')[0].text+"','"+each_item.select('div.author')[0].text+"','"+each_item.select('div.title')[0].text+"')")
try:
cnxn.commit()
except:
cnxn.rollback()
web.find_element_by_link_text('‹ 上頁').click()
num_page = num_page-1
web.close()
cnxn.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment