Skip to content

Instantly share code, notes, and snippets.

@cquest
Created November 26, 2020 11:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cquest/c8bc69ae03686bce9b96d25b7869aed7 to your computer and use it in GitHub Desktop.
Save cquest/c8bc69ae03686bce9b96d25b7869aed7 to your computer and use it in GitHub Desktop.
#! /bin/python3
import re, json
import requests
from bs4 import BeautifulSoup
SITE = "https://www.acheterenpaysdelorient.fr"
homepage = requests.get(SITE)
for url in re.finditer(r'<a href="(/commerces/.*)">', homepage.text):
page = BeautifulSoup(requests.get(SITE+url.group(1)).text, 'html.parser')
body = page.find(class_="MainBody-content")
j = {
"url": SITE+url.group(1),
"nom": body.find(class_='Partner-title').string,
"categorie": list(body.find(class_='Partner-subtitle').stripped_strings),
"addr": list(body.find(class_='Partner-address').stripped_strings),
}
if body.find(class_='Partner-openings'):
j["horaires"] = list(body.find(class_='Partner-openings').stripped_strings),
latlon = re.search(r'position: {lat: (.*), lng: (.*)}',repr(list(page.find_all('script'))))
j['lat'] = float(latlon.group(1))
j['lon'] = float(latlon.group(2))
print(json.dumps(j, ensure_ascii=False))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment