This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 把'則留言'和','去掉 | |
for i in range(len(comment_nums)): | |
index = comment_nums[i].find('則') | |
comment_nums[i] = int(comment_nums[i][:index].replace(',','')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 建立一個空的list | |
comment_nums = [] | |
# 抓取每一篇貼文的留言數 | |
# class name可能會修改,需要定期偵錯! | |
for ii in frames: | |
read = ii.find('span',class_="oi732d6d ik7dh3pa d2edcug0 hpfvmrgz qv66sw1b c1et5uql a8c37x1j muag1w35 enqfppq2 jq4qci2q a3bd9o3v knj5qynh m9osqain") | |
# 有些貼文沒有留言數,所以抓下來的東西是None,因此直接append 0 | |
if(read == None): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
# 前面解釋過的,抓取所有貼文 | |
soup = Soup(driver.page_source, "lxml") | |
frames = soup.find_all(class_='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0') | |
# 抓取第一篇的留言數 | |
# find()裡面要先放'span'標籤再放class name | |
# frames[0]為第一篇貼文 | |
# .text為抓取文字內容 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in range(len(like)): | |
# 處理出現 '\xa0萬' 的數值 | |
if(like[i].find('\xa0萬') != -1): | |
like[i] = int(float(like[i][:like[i].find('\xa0萬')])*10000) | |
# 處理有出現 ',' 的數值 | |
else: | |
like[i] = int(like[i].replace(',','')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 建立一個空的list | |
like = [] | |
# 抓取每一篇貼文的按讚數 | |
# class name可能會修改,需要定期偵錯! | |
for ii in frames: | |
thumb = ii.find('span',class_="gpro0wi8 pcp91wgn") | |
# 有些貼文沒有按讚數,所以抓下來的東西是None,因此直接append 0 | |
if(thumb == None): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
# 前面解釋過的,抓取所有貼文 | |
soup = Soup(driver.page_source, "lxml") | |
frames = soup.find_all(class_='du4w35lb k4urcfbm l9j0dhe7 sjgh65i0') | |
# 抓取第一篇的按讚數 | |
# find()裡面要先放'span'標籤再放class name | |
# frames[0]為第一篇貼文 | |
# .text為抓取文字內容 |
NewerOlder