This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
# 透過Browser Driver 開啟 Chrome | |
driver = webdriver.Chrome(r"C:\Clarissa\文章\程式碼\Selenium\chromedriver") | |
# 前往特定網址 | |
default_url = "https://www.google.com.tw" | |
driver.get(default_url) | |
# 獲取目前網頁url | |
driver.current_url |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from selenium import webdriver | |
from bs4 import BeautifulSoup as Soup | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.wait import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
media_url = [] | |
count = 0 # 目前獲得圖片連結數 | |
soup = Soup(browser.page_source,"lxml") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from bs4 import BeautifulSoup as Soup | |
browser = webdriver.Chrome() | |
url = 'https://www.instagram.com/p/CEYqVrtp1HA/' | |
browser.get(url) | |
soup = Soup(browser.page_source,"lxml") # 抓取網頁原始碼 | |
arti = soup.article # find article標籤內容 | |
post_time = arti.find(class_ = "_1o9PC Nzb55").get('datetime') # 抓取特定classname裡的datetime標籤 | |
print(post_time) |