Created
July 26, 2020 05:42
-
-
Save arthur-tomsjj/1874d406c8e29e24c4c496c1c33a8011 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
from selenium import webdriver | |
import time | |
import os | |
import requests | |
#因urllib.request只能用在靜態網頁 | |
#故使用driver來進入動態網頁 | |
driver = webdriver.Chrome('D:\PythonLab\chromedriver_win32/chromedriver.exe') | |
driver.get("https://pcredivewiki.tw/Gacha") | |
url = "https://pcredivewiki.tw" | |
time.sleep(5) #等待動態網頁載入完整的網頁原始碼 | |
html = driver.page_source #擷取網頁原始碼 | |
page = BeautifulSoup(html,'html.parser') | |
All_img = page.find_all('img') | |
index = 0 | |
for img in All_img: | |
src = url+img['src'] | |
if src.find('unit') != -1: | |
index += 1 | |
fname = '爬蟲img/'+str(index)+'.png' | |
data = requests.get(src,stream = True) | |
with open(fname,'wb') as f: | |
f.write(data.content) | |
f.close() | |
print('抓取公主連結 : '+src.split('/')[-1]) | |
print('公主連結-爬蟲結束...') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment