Skip to content

Instantly share code, notes, and snippets.

@shaddyshad
Created May 1, 2020 09:38
Show Gist options
  • Save shaddyshad/a975080b20c0b341d10fa0074352ba2e to your computer and use it in GitHub Desktop.
Save shaddyshad/a975080b20c0b341d10fa0074352ba2e to your computer and use it in GitHub Desktop.
Extract all images from a website url
import re
import requests
from bs4 import BeautifulSoup
def extract_images(site):
""" Extract images from the url given"""
response = requests.get(site)
soup = BeautifulSoup(response.text, 'html.parser')
img_tags = soup.find_all('img')
# extract the urls
urls = [img['src'] for img in img_tags]
for url in urls:
pattern = r'/([\w_-]+[.](jpg|gif|png))$' # pattern to extract image files
filename = re.search(pattern, url)
if not filename:
print("No filename {}", url)
continue
with open(filename.group(1), 'wb') as f:
if 'http' not in url: # relative reques?
url = "{}{}".format(site, url)
print(url)
response = requests.get(url)
f.write(response.content)
if __name__ == "__main__":
site = "https://heartofvegasslots.com/#="
extract_images(site)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment