Skip to content

Instantly share code, notes, and snippets.

@k-funk
Last active November 2, 2022 13:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save k-funk/b71d6a685201b96f50fe0a83c0e97aa6 to your computer and use it in GitHub Desktop.
Save k-funk/b71d6a685201b96f50fe0a83c0e97aa6 to your computer and use it in GitHub Desktop.
Scraping Google Photos Public Album
# inspired by https://medium.com/p/d49f037c8e3c/responses/show (hopefully the regex is updated there when this one breaks)
# also exists as a django-cms plugin at https://github.com/k-funk/djangocms-scrape-google-photos-album
import logging
import re
import requests
logger = logging.getLogger(__name__)
# originally this was 139min chars. not actually sure the length they can be
REGEX = r"(https:\/\/lh3\.googleusercontent\.com\/[a-zA-Z0-9\-_]{128,})"
def get_photos_from_html(html):
# first and last elements are the album cover
return re.findall(REGEX, html)[1:-1]
def get_photo_urls(album_url):
logger.info('Scraping Google Photos album at: {}'.format(album_url))
try:
r = requests.get(album_url)
photo_urls = get_photos_from_html(r.text) or []
if not len(photo_urls):
raise Exception('No photos found.')
logger.info("# of images: {}".format(len(photo_urls)))
photo_urls.reverse() # makes the order appear the way it does on the website
return photo_urls
except Exception as err:
logger.error('Google Photos scraping failed:\n{}'.format(str(err)))
return []
if __name__ == "__main__":
print(get_photo_urls('https://photos.app.goo.gl/...'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment