Skip to content

Instantly share code, notes, and snippets.

Last active October 15, 2020 01:01
What would you like to do?
Instagram post urls
from selenium import webdriver
from bs4 import BeautifulSoup as Soup
import time
# browser = webdriver.Chrome()
url = ''
browser.get(url) # 前往該網址
# 往下滑並取得新的貼文連結
n_scroll = 5
post_url = []
for i in range(n_scroll):
scroll = 'window.scrollTo(0, document.body.scrollHeight);'
html = browser.page_source
soup = Soup(html, 'lxml')
# 尋找所有的貼文連結
for elem in'article div div div div a'):
# 如果新獲得的貼文連結不在列表裡,則加入
if elem['href'] not in post_url:
time.sleep(2) # 等待網頁加載
# 總共加載的貼文連結數
print("總共取得 " + str(len(post_url)) + " 篇貼文連結")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment