Skip to content

Instantly share code, notes, and snippets.

@gpollo
Created February 25, 2020 17:11
Show Gist options
  • Save gpollo/4f3b743ebe1a9238422b74b81b8e3df5 to your computer and use it in GitHub Desktop.
Save gpollo/4f3b743ebe1a9238422b74b81b8e3df5 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import mechanize
import bs4
import re
import random
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.set_handle_redirect(True)
link_recettes = "https://www.lacuisinedejeanphilippe.com/recettes/"
response = browser.open(link_recettes)
soup = bs4.BeautifulSoup(response.read(), "lxml")
recettes = set()
recette_regex = re.compile('^http.*\.com/recipe/.*$')
grid = soup.findAll('div', id="grid-2")
for g in grid:
links = g.findAll('a', href=True)
for l in links:
if recette_regex.match(l['href']) is None:
continue
recettes.add(l['href'])
recettes = list(recettes)
print(recettes[random.randint(0, len(recettes)-1)])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment