Skip to content

Instantly share code, notes, and snippets.

@sean-duffy
Created May 22, 2015 23:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sean-duffy/37db93c8b42b67571cc1 to your computer and use it in GitHub Desktop.
Save sean-duffy/37db93c8b42b67571cc1 to your computer and use it in GitHub Desktop.
import re
import requests
from lxml import html
rspca_base = 'https://www.rspca.org.uk'
def get_pet(name, ref):
base = rspca_base + '/findapet/details/-/Animal/{0}/ref/{1}/rehome/'
base = base.format(name, ref)
r = requests.get(base)
tree = html.fromstring(r.content)
overview = tree.xpath("//div[@class='petOverview']")[0]
main_image = None
e = tree.xpath("//img[@id='largeImage']")
if len(e) > 0:
main_image = e[0].get('src')
pet_type = None
breed = None
colour = None
age = None
at_a_glance = overview.xpath("//div[@class='atAGlance']")[0]
for e in at_a_glance:
m = re.search('Type: (.*)', e.text_content())
if m is not None:
pet_type = m.group(1).strip()
m = re.search('Breed: (.*)', e.text_content())
if m is not None:
breed = m.group(1).strip()
m = re.search('Colour: (.*)', e.text_content())
if m is not None:
colour = m.group(1).strip()
m = re.search('Age: (.*)', e.text_content())
if m is not None:
age = m.group(1).strip()
description = None
data = {
"name": name,
"ref": ref,
"main_image": main_image,
"breed": breed,
"colour": colour,
"age": age,
"description": description
}
return data
pet = get_pet('DAKOTA', 'BSA2038947')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment