Skip to content

Instantly share code, notes, and snippets.

@ting11222001
Last active October 6, 2020 09:00
Show Gist options
  • Save ting11222001/f1c9d1ecd2fd097ac78a0872a44e42ac to your computer and use it in GitHub Desktop.
Save ting11222001/f1c9d1ecd2fd097ac78a0872a44e42ac to your computer and use it in GitHub Desktop.
#載入selenium套件
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
#我的chromedriver路徑
chrome_driver_path = '/Users/ting11222001/Downloads/chromedriver'
#第一個頁面的url
url = 'https://gogakuru.com/english/phrase/genre/180_%E5%88%9D%E7%B4%9A%E3%83%AC%E3%83%99%E3%83%AB.html?layoutPhrase=1&orderPhrase=1&condMovie=0&flow=enSearchGenre&condGenre=180&perPage=50'
#可以不讓瀏覽器執行在前景,而是在背景執行(不讓我們肉眼看得見),如以下宣告 options
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(chrome_options=options, executable_path=chrome_driver_path)
# implicitly_wait隱性等待5秒,等網頁載入完成才執行下一步
driver.implicitly_wait(5)
driver.get(url)
#每個例句會放進這個list
results = []
#手動定義總頁數
pages = 187
for page in range(1, pages+1):
print('Now is: Page ', page)
print('Working...')
#selenium抓取每個例句
items = driver.find_elements_by_xpath("//span[@class='font-en']")
for item in items:
results.append(item.text)
print('Done!')
#如果是最後一頁,就不繼續執行”點擊下一頁“這個動作,如果不是,就點擊次ヘ,也就是“下一頁”功能鍵
if page == pages:
break
else:
driver.find_element_by_xpath("//span[@class='right']/a").click()
#強制停3秒再開始下一個循環
time.sleep(3)
#印出結果
print('===Final Results===')
print(results)
#關閉selenium driver
driver.quit()
#將每個例句寫進.txt檔
with open('japan.txt', 'a', encoding='utf8') as file:
for i in results:
file.write(i)
file.write('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment