Skip to content

Instantly share code, notes, and snippets.

@chiaoyaaaaa
Created September 13, 2020 15:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save chiaoyaaaaa/76bd852834aa5df4d5c611426a364541 to your computer and use it in GitHub Desktop.
Save chiaoyaaaaa/76bd852834aa5df4d5c611426a364541 to your computer and use it in GitHub Desktop.
catch fb all post's number of likes, comments and shares
from selenium import webdriver
from bs4 import BeautifulSoup as Soup
#driver = webdriver.Chrome()
# 設定要前往的網址
url = 'https://www.facebook.com/bbcnews'
# 前往該網址
driver.get(url)
# 抓取每篇貼文
soup = Soup(driver.page_source, "lxml")
frames = soup.find_all(class_='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0')
# 抓取按讚數
like = []
for ii in frames:
thumb = ii.find('span',class_="gpro0wi8 pcp91wgn")
if(thumb == None):
like.append('0')
else:
like.append(thumb.text)
# 按讚數資料整理
for i in range(len(like)):
if(like[i].find('\xa0萬') != -1):
like[i] = int(float(like[i][:like[i].find('\xa0萬')])*10000)
else:
like[i] = int(like[i].replace(',',''))
# 抓取留言數
comment_nums = []
for ii in frames:
read = ii.find('span',class_="oi732d6d ik7dh3pa d2edcug0 hpfvmrgz qv66sw1b c1et5uql a8c37x1j muag1w35 enqfppq2 jq4qci2q a3bd9o3v knj5qynh m9osqain")
if(read == None):
comment_nums.append('0則')
else:
comment_nums.append(read.text)
# 留言數資料整理
for i in range(len(comment_nums)):
index = comment_nums[i].find('則')
comment_nums[i] = int(comment_nums[i][:index].replace(',',''))
# 抓取分享數
share = []
for ii in frames:
read = ii.find_all('span',class_="oi732d6d ik7dh3pa d2edcug0 hpfvmrgz qv66sw1b c1et5uql a8c37x1j muag1w35 enqfppq2 jq4qci2q a3bd9o3v knj5qynh m9osqain")[1]
if(read == None):
share.append('0次')
else:
share.append(read.text)
# 分享數資料整理
for i in range(len(share)):
index = share[i].find('次')
share[i] = int(share[i][:index].replace(',',''))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment