This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 往下滑並取得新的貼文連結 | |
n_scroll = 5 | |
post_url = [] | |
for i in range(n_scroll): | |
scroll = 'window.scrollTo(0, document.body.scrollHeight);' | |
browser.execute_script(scroll) | |
html = browser.page_source | |
soup = Soup(html, 'lxml') | |
# 尋找所有的貼文連結 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# browser = webdriver.Chrome() | |
url = 'https://www.instagram.com/bbcnews/' | |
browser.get(url) # 前往該網址 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from bs4 import BeautifulSoup as Soup | |
import time |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from bs4 import BeautifulSoup as Soup | |
import time | |
# browser = webdriver.Chrome() | |
url = 'https://www.instagram.com/bbcnews/' | |
browser.get(url) # 前往該網址 | |
# 往下滑並取得新的貼文連結 | |
n_scroll = 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
post_url = '/p/CEriQnOMwW9/' | |
find = False | |
# 不在目前的網頁元素裡,則往下滑,加載新貼文 | |
while not find: | |
try: | |
# 找到對應的貼文,鼠標移入 | |
post_elem = browser.find_element_by_xpath('//a[@href="'+str(post_url)+'"]') | |
action = ActionChains(browser) | |
action.move_to_element(post_elem).perform() | |
# 找到需要的網頁元素 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# browser = webdriver.Chrome() | |
url = 'https://www.instagram.com/bbcnews/' | |
browser.get(url) # 前往該網址 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.common.action_chains import ActionChains |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 首先,以下是我們本次需要的套件,先import進來。 | |
from selenium import webdriver | |
from selenium.webdriver.common.action_chains import ActionChains | |
# browser = webdriver.Chrome() | |
url = 'https://www.instagram.com/bbcnews/' | |
browser.get(url) # 前往該網址 | |
post_url = '/p/CEriQnOMwW9/' | |
find = False |