Skip to content

Instantly share code, notes, and snippets.

@Nannigalaxy
Last active June 14, 2021 12:46
Show Gist options
  • Save Nannigalaxy/789f5c008ed264edb7c186d4c1adc49c to your computer and use it in GitHub Desktop.
Save Nannigalaxy/789f5c008ed264edb7c186d4c1adc49c to your computer and use it in GitHub Desktop.
Image scraping script
'''
Image scrapping script
Install libraries
$pip install requests bs4
'''
# import required modules
import requests # for get requests
from bs4 import BeautifulSoup as bs # for scraping
import os # for creating dirs & writing files
Images = ['sedan', "suv", "hatchback", "sports car"] # the required images list
for img in Images:
print("Image: ", img)
url = 'https://unsplash.com/search/photos/' + img # the unsplash api for searching a required image
x = 0 # set the var x to 0
filePath = 'images/' + img # file path for the directory
if not os.path.exists(filePath):
os.makedirs(filePath)
# download page for parsing
page = requests.get(url) # get the url
soup = bs(page.text, 'html.parser') # parse it with beautifulSoup, imported as bs, store it in soup var
# locate all elements with image tag
image_tags = soup.findAll('img')
# create directory for required images
if not os.path.exists(filePath): # if the dir doesn't exist
os.makedirs(filePath) # create the dir
# move to new directory
os.chdir(filePath)
print("Image tags found: ", len(image_tags))
# writing images in the created folder
for i, image in enumerate(image_tags): # for each image in the image_tags array,
try: # go thru this loop
url = image['src'] # set the url variable to the src of the image tags
response = requests.get(url) # go to the url and store it in the response var
if response.status_code == 200: # if the status code === 200
with open(img + '-' + str(x) + '.jpg', 'wb') as f: # open the image as the mentioned file format, (w for writing, and b for binary)
# as the format is jpg, it needs to be saved as a binary file
# here "f" is just a variable assignment
f.write(requests.get(url).content) # get the content of the url and write/save in the created dir
f.close() # stop writing/saving the image
x += 1 # increment x by 1
except: # on excpetion (i.e, status code !== 200, or other errors)
pass # repeat the loop again
print(i+1, end=" ")
print()
os.chdir("../../")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment