Skip to content

Instantly share code, notes, and snippets.

@lobstrio
Created August 3, 2021 17:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save lobstrio/ee81093ce2289a9bc75b23cd5d98bcbd to your computer and use it in GitHub Desktop.
Save lobstrio/ee81093ce2289a9bc75b23cd5d98bcbd to your computer and use it in GitHub Desktop.
Collect all data from a Search URL on Google Maps 👋
# _*_ coding: utf-8 _*°
# Copyright(C) 2021 lobstr
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time
import csv
from lxml import html
class CrawlerGoogleMapsSelenium():
def __init__(self):
self.driver = webdriver.Chrome("/Users/sashabouloudnine/Downloads/chromedriver")
def accept_cookies(self):
accept_button = self.driver.find_element_by_xpath("//span[contains(text(), \"J\'accepte\")]")
if accept_button:
self.driver.execute_script("arguments[0].scrollIntoView();", accept_button)
time.sleep(2)
accept_button.click()
time.sleep(2)
def iter_etabs(self, starting_url):
assert starting_url
self.driver.get(starting_url)
time.sleep(2)
self.accept_cookies()
_ = WebDriverWait(self.driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//div[contains(@jsaction, 'mouseover:pane')]")))
time.sleep(2)
urls = self.driver.find_elements_by_xpath("//div[contains(@jsaction, 'mouseover:pane')]/a")
urls_list = []
for url in urls:
url = url.get_attribute('href')
urls_list.append(url)
for url in urls_list:
assert url
result_dict = self.get_etab(url)
print('\t'.join([str(v) for v in result_dict.values()]))
yield result_dict
def get_etab(self, url):
assert url
print(url)
self.driver.get(url)
_ = WebDriverWait(self.driver, 20).until(EC.visibility_of_element_located((By.XPATH, "//span[@jstcache=127]")))
time.sleep(5)
try:
categorie = self.driver.find_element_by_xpath('//button[@jsaction="pane.rating.category"]').text
except NoSuchElementException:
categorie = ''
try:
reviews = self.driver.find_element_by_xpath('//button[@jsaction="pane.rating.moreReviews"]').text
except NoSuchElementException:
reviews = ''
try:
prix = self.driver.find_element_by_xpath('//span[contains(@aria-label, "Prix:")]').text
except NoSuchElementException:
prix = ''
# aria-label
try:
nom = self.driver.find_element_by_xpath('//div[@role="main" and @aria-label]').get_attribute('aria-label')
except NoSuchElementException:
nom = ''
try:
score = self.driver.find_element_by_xpath('//ol[@class="section-star-array"]').get_attribute('aria-label')
score.replace('\xa0', ' ')
except NoSuchElementException:
score = ''
try:
adresse = self.driver.find_element_by_xpath('//button[@data-item-id="address"]').get_attribute('aria-label')
adresse = adresse.replace('Adresse: ', '')
except NoSuchElementException:
adresse = ''
try:
telephone = self.driver.find_element_by_xpath('//button[contains(@aria-label, "Numéro de téléphone:")]').get_attribute('aria-label')
telephone = telephone.replace('Numéro de téléphone: ', '')
except NoSuchElementException:
telephone = ''
try:
website = self.driver.find_element_by_xpath('//button[contains(@aria-label, "Site Web:")]').get_attribute('aria-label')
website = website.replace('Site Web: ', '')
except NoSuchElementException:
website = ''
result_dict = {
'nom': nom,
'categorie': categorie,
'reviews': reviews,
'score': score,
'prix': prix,
'adresse': adresse,
'telephone': telephone,
'website': website
}
return result_dict
def main(self, url):
l = []
etabs = list(self.iter_etabs(url))
keys = ['nom', 'categorie', 'reviews', 'score', 'prix', 'adresse', 'telephone', 'website']
with open('googlemaps_20210803.csv', mode='w') as f:
writer = csv.DictWriter(f, delimiter='\t', fieldnames=keys)
writer.writeheader()
for etab in etabs:
writer.writerow(etab)
if __name__ == '__main__':
starting_url = 'https://www.google.com/maps/search/restaurant+marseille/@43.2850096,5.3752173,14z'
google_maps_crawler = CrawlerGoogleMapsSelenium()
google_maps_crawler.main(starting_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment