Skip to content

Instantly share code, notes, and snippets.

@ma7555
Last active November 24, 2019 21:05
Show Gist options
  • Save ma7555/276aae9de64a717d9ec25b74bf72c3d9 to your computer and use it in GitHub Desktop.
Save ma7555/276aae9de64a717d9ec25b74bf72c3d9 to your computer and use it in GitHub Desktop.
import petpy
import pandas as pd
import urllib.request
import time
import os
from ast import literal_eval
from tqdm import tqdm_notebook
def downloader(filename, image_url):
full_file_name = filename + '.jpg'
urllib.request.urlretrieve(image_url,full_file_name)
# Replace xx with API key and secret
pf = petpy.Petfinder(key='xx', secret='xx')
cats = pf.animals(results_per_page=100, pages=8000, return_df=True, animal_type='cat')
pure_cats_w_photos = cats[(~cats['breeds.mixed']) & (~cats['breeds.unknown']) &
(cats['breeds.secondary'].isna()) & ~cats['breeds.primary'].isna() &
cats['photos']][['id', 'url', 'type', 'age', 'gender',
'size', 'coat', 'breeds.primary', 'photos']]
pure_cats_w_photos.rename(columns={'breeds.primary': 'breed'}, inplace=True)
pure_cats_w_photos['med_photos'] = pure_cats_w_photos.photos.apply(lambda photos: [photo['medium'] for photo in photos])
pure_cats_w_photos['breed'] = pure_cats_w_photos.breed.str.replace('/', '-')
pure_cats_w_photos.to_csv('cats.csv', index=False)
# pure_cats_w_photos = pd.read_csv('cats.csv')
# pure_cats_w_photos['med_photos'] = pure_cats_w_photos.med_photos.apply(literal_eval)
for my_folder in pure_cats_w_photos.breed.unique():
if not os.path.exists(my_folder):
os.makedirs(my_folder)
downloaded_cats = []
for dirname, _, filenames in os.walk('.'):
for filename in filenames:
if filename.endswith('.jpg'):
downloaded_cats.append(filename)
for ix, row in tqdm_notebook(pure_cats_w_photos.iterrows(), total=pure_cats_w_photos.shape[0]):
if str(row.id) in str(downloaded_cats):
continue
for photo_url in row.med_photos:
try:
downloader(r'{breed}\{id}_{ix}'.format(breed=row.breed, ix=ix, id=row.id), photo_url)
except HTTPError:
time.sleep(1)
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment