Created
May 22, 2015 23:18
-
-
Save sean-duffy/37db93c8b42b67571cc1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import requests | |
from lxml import html | |
rspca_base = 'https://www.rspca.org.uk' | |
def get_pet(name, ref): | |
base = rspca_base + '/findapet/details/-/Animal/{0}/ref/{1}/rehome/' | |
base = base.format(name, ref) | |
r = requests.get(base) | |
tree = html.fromstring(r.content) | |
overview = tree.xpath("//div[@class='petOverview']")[0] | |
main_image = None | |
e = tree.xpath("//img[@id='largeImage']") | |
if len(e) > 0: | |
main_image = e[0].get('src') | |
pet_type = None | |
breed = None | |
colour = None | |
age = None | |
at_a_glance = overview.xpath("//div[@class='atAGlance']")[0] | |
for e in at_a_glance: | |
m = re.search('Type: (.*)', e.text_content()) | |
if m is not None: | |
pet_type = m.group(1).strip() | |
m = re.search('Breed: (.*)', e.text_content()) | |
if m is not None: | |
breed = m.group(1).strip() | |
m = re.search('Colour: (.*)', e.text_content()) | |
if m is not None: | |
colour = m.group(1).strip() | |
m = re.search('Age: (.*)', e.text_content()) | |
if m is not None: | |
age = m.group(1).strip() | |
description = None | |
data = { | |
"name": name, | |
"ref": ref, | |
"main_image": main_image, | |
"breed": breed, | |
"colour": colour, | |
"age": age, | |
"description": description | |
} | |
return data | |
pet = get_pet('DAKOTA', 'BSA2038947') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment