Skip to content

Instantly share code, notes, and snippets.

@arthur-tomsjj
Created July 26, 2020 05:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arthur-tomsjj/1874d406c8e29e24c4c496c1c33a8011 to your computer and use it in GitHub Desktop.
Save arthur-tomsjj/1874d406c8e29e24c4c496c1c33a8011 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import os
import requests
#因urllib.request只能用在靜態網頁
#故使用driver來進入動態網頁
driver = webdriver.Chrome('D:\PythonLab\chromedriver_win32/chromedriver.exe')
driver.get("https://pcredivewiki.tw/Gacha")
url = "https://pcredivewiki.tw"
time.sleep(5) #等待動態網頁載入完整的網頁原始碼
html = driver.page_source #擷取網頁原始碼
page = BeautifulSoup(html,'html.parser')
All_img = page.find_all('img')
index = 0
for img in All_img:
src = url+img['src']
if src.find('unit') != -1:
index += 1
fname = '爬蟲img/'+str(index)+'.png'
data = requests.get(src,stream = True)
with open(fname,'wb') as f:
f.write(data.content)
f.close()
print('抓取公主連結 : '+src.split('/')[-1])
print('公主連結-爬蟲結束...')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment