Skip to content

Instantly share code, notes, and snippets.

@clarissarjtai
Created January 29, 2021 08:05
Show Gist options
  • Save clarissarjtai/8620dac3fffa678ae6834058040711b3 to your computer and use it in GitHub Desktop.
Save clarissarjtai/8620dac3fffa678ae6834058040711b3 to your computer and use it in GitHub Desktop.
google map review
import os
import re
import time
import pandas as pd
from datetime import datetime
from selenium import webdriver
from bs4 import BeautifulSoup as Soup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
# browser是你的webdriver
##### 你的程式碼 #####
# 因為一則評論中的照片若超過四張,就會被縮減起來,所以把它點擊開來才能加載並抓取
all_photo = browser.find_elements_by_class_name('section-review-photo')
for ap in all_photo:
ap.click()
# 獲取網頁原始碼
soup = Soup(browser.page_source,"lxml")
# 獲取評論資料框架
all_reviews = soup.find_all(class_ = 'section-review ripple-container GLOBAL__gm2-body-2')
ar = all_reviews[0] # 第幾則評論
# 評論者名稱
ar.find(class_ = "section-review-title").text
# 評論者代稱&評論數
subtitle_review = ar.find(class_ = "section-review-subtitle").text
# 評論星數
star_review = str(ar.find(class_ = "section-review-stars").get('aria-label').strip().strip("顆星"))
# 評論時間
date_review = ar.find(class_ = "section-review-publish-date").text
# 評論內容
text_review = ar.find(class_ = "section-review-text").text
# 獲取照片連結
ar.find_all(class_ = "section-review-photo")
# 評論照片
photos = ar.find_all(class_ = "section-review-photo")
photo_urls = []
for ph in photos:
photo_urls.append(ph.get('style').strip('background-image:url(').strip(')'))
# 選擇評論類型
data_index = 1 # 最新
# 等待網頁元素的出現
WebDriverWait(browser, 30).until(EC.presence_of_element_located((By.CLASS_NAME, 'action-menu-entry')))
# 找到排序方法的按鈕
category_click = browser.find_elements_by_class_name('action-menu-entry')
print(category_click[data_index].text)
# 點擊需要的排序方法
category_click[data_index].click()
# 以關鍵字篩選評論
mentioned_index = 1
# 等待網頁元素的出現
WebDriverWait(browser, 30).until(EC.presence_of_element_located((By.CLASS_NAME, 'tuPVDR7ouq5__text.gm2-body-2')))
# 找到篩選方法的按鈕
category_click = browser.find_elements_by_class_name('tuPVDR7ouq5__text.gm2-body-2')
print(category_click[mentioned_index].text)
# 點擊需要的篩選關鍵字
category_click[mentioned_index].click()
@MB95530
Copy link

MB95530 commented Nov 3, 2021

請問browser get 的url 是甚麼?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment