Skip to content

Instantly share code, notes, and snippets.

@Andiology
Last active October 15, 2020 01:01
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save Andiology/322bdc14d949612f5ef2a523f860a96a to your computer and use it in GitHub Desktop.
Instagram post urls
from selenium import webdriver
from bs4 import BeautifulSoup as Soup
import time
# browser = webdriver.Chrome()
url = 'https://www.instagram.com/bbcnews/'
browser.get(url) # 前往該網址
# 往下滑並取得新的貼文連結
n_scroll = 5
post_url = []
for i in range(n_scroll):
scroll = 'window.scrollTo(0, document.body.scrollHeight);'
browser.execute_script(scroll)
html = browser.page_source
soup = Soup(html, 'lxml')
# 尋找所有的貼文連結
for elem in soup.select('article div div div div a'):
# 如果新獲得的貼文連結不在列表裡,則加入
if elem['href'] not in post_url:
post_url.append(elem['href'])
time.sleep(2) # 等待網頁加載
# 總共加載的貼文連結數
print("總共取得 " + str(len(post_url)) + " 篇貼文連結")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment