Skip to content

Instantly share code, notes, and snippets.

View chiaoyaaaaa's full-sized avatar

chiaoyaaaaa chiaoyaaaaa

View GitHub Profile
@chiaoyaaaaa
chiaoyaaaaa / fb_number_of_likes_comments_shares.py
Created September 13, 2020 15:59
catch fb all post's number of likes, comments and shares
from selenium import webdriver
from bs4 import BeautifulSoup as Soup
#driver = webdriver.Chrome()
# 設定要前往的網址
url = 'https://www.facebook.com/bbcnews'
# 前往該網址
driver.get(url)
@chiaoyaaaaa
chiaoyaaaaa / share_data_clean.py
Created September 13, 2020 03:41
clean share list
# 把'則留言'和','去掉
for i in range(len(share)):
index = share[i].find('次')
share[i] = int(share[i][:index].replace(',',''))
@chiaoyaaaaa
chiaoyaaaaa / catch_every_share.py
Created September 13, 2020 03:37
catch fb all post's number of shares
# 建立一個空的list
share = []
# 抓取每一篇貼文的分享數
# find_all() 後面要加上 [1] 才能取出分享數
# class name可能會修改,需要定期偵錯!
for ii in frames:
read = ii.find_all('span',class_="oi732d6d ik7dh3pa d2edcug0 hpfvmrgz qv66sw1b c1et5uql a8c37x1j muag1w35 enqfppq2 jq4qci2q a3bd9o3v knj5qynh m9osqain")[1]
# 有些貼文沒有分享數,所以抓下來的東西是None,因此直接append 0
@chiaoyaaaaa
chiaoyaaaaa / catch_one_share.py
Created September 13, 2020 03:33
catch fb first post's number of shares
from bs4 import BeautifulSoup
# 前面解釋過的,抓取所有貼文
soup = Soup(driver.page_source, "lxml")
frames = soup.find_all(class_='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0')
# 抓取第一篇的留言數
# find_all()裡面要先放'span'標籤再放class name
# frames[0]為第一篇貼文
# .text為抓取文字內容
@chiaoyaaaaa
chiaoyaaaaa / comment_data_clean.py
Last active September 12, 2020 16:38
clean comment_nums list
# 把'則留言'和','去掉
for i in range(len(comment_nums)):
index = comment_nums[i].find('則')
comment_nums[i] = int(comment_nums[i][:index].replace(',',''))
@chiaoyaaaaa
chiaoyaaaaa / catch_every_comment.py
Created September 12, 2020 13:52
catch fb all post's number of comments
# 建立一個空的list
comment_nums = []
# 抓取每一篇貼文的留言數
# class name可能會修改,需要定期偵錯!
for ii in frames:
read = ii.find('span',class_="oi732d6d ik7dh3pa d2edcug0 hpfvmrgz qv66sw1b c1et5uql a8c37x1j muag1w35 enqfppq2 jq4qci2q a3bd9o3v knj5qynh m9osqain")
# 有些貼文沒有留言數,所以抓下來的東西是None,因此直接append 0
if(read == None):
@chiaoyaaaaa
chiaoyaaaaa / catch_one_comment.py
Created September 12, 2020 13:41
catch fb first post's number of comments
from bs4 import BeautifulSoup
# 前面解釋過的,抓取所有貼文
soup = Soup(driver.page_source, "lxml")
frames = soup.find_all(class_='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0')
# 抓取第一篇的留言數
# find()裡面要先放'span'標籤再放class name
# frames[0]為第一篇貼文
# .text為抓取文字內容
@chiaoyaaaaa
chiaoyaaaaa / like_data_clean.py
Created September 12, 2020 06:06
clean like list
for i in range(len(like)):
# 處理出現 '\xa0萬' 的數值
if(like[i].find('\xa0萬') != -1):
like[i] = int(float(like[i][:like[i].find('\xa0萬')])*10000)
# 處理有出現 ',' 的數值
else:
like[i] = int(like[i].replace(',',''))
@chiaoyaaaaa
chiaoyaaaaa / catch_every_like.py
Last active September 12, 2020 13:43
catch fb all post's number of likes
# 建立一個空的list
like = []
# 抓取每一篇貼文的按讚數
# class name可能會修改,需要定期偵錯!
for ii in frames:
thumb = ii.find('span',class_="gpro0wi8 pcp91wgn")
# 有些貼文沒有按讚數,所以抓下來的東西是None,因此直接append 0
if(thumb == None):
@chiaoyaaaaa
chiaoyaaaaa / catch_one_like.py
Last active September 12, 2020 13:43
catch fb first post's number of likes
from bs4 import BeautifulSoup
# 前面解釋過的,抓取所有貼文
soup = Soup(driver.page_source, "lxml")
frames = soup.find_all(class_='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0')
# 抓取第一篇的按讚數
# find()裡面要先放'span'標籤再放class name
# frames[0]為第一篇貼文
# .text為抓取文字內容