Skip to content

Instantly share code, notes, and snippets.

@kadnan
Created June 10, 2018 11:38
Show Gist options
  • Save kadnan/20b0782b38eeb3bcecb6ea167f800d36 to your computer and use it in GitHub Desktop.
Save kadnan/20b0782b38eeb3bcecb6ea167f800d36 to your computer and use it in GitHub Desktop.
methods to access recipes listing and extract markup
def fetch_raw(recipe_url):
html = None
print('Processing..{}'.format(recipe_url))
try:
r = requests.get(recipe_url, headers=headers)
if r.status_code == 200:
html = r.text
except Exception as ex:
print('Exception while accessing raw html')
print(str(ex))
finally:
return html.strip()
def get_recipes():
recipies = []
salad_url = 'https://www.allrecipes.com/recipes/96/salad/'
url = 'https://www.allrecipes.com/recipes/96/salad/'
print('Accessing list')
try:
r = requests.get(url, headers=headers)
if r.status_code == 200:
html = r.text
soup = BeautifulSoup(html, 'lxml')
links = soup.select('.fixed-recipe-card__h3 a')
idx = 0
for link in links:
sleep(2)
recipe = fetch_raw(link['href'])
recipies.append(recipe)
idx += 1
if idx > 2:
break
except Exception as ex:
print('Exception in get_recipes')
print(str(ex))
finally:
return recipies
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment