Skip to content

Instantly share code, notes, and snippets.

@dimitryzub
Last active October 3, 2022 09:52
Show Gist options
  • Save dimitryzub/12c9b70449cd8dd9c1184504e60af41e to your computer and use it in GitHub Desktop.
Save dimitryzub/12c9b70449cd8dd9c1184504e60af41e to your computer and use it in GitHub Desktop.
Scrape Google Images with Python and SerpApi web scraping library
def serpapi_get_google_images():
image_results = []
for query in ["Coffee", "boat", "skyrim", "minecraft"]:
# search query parameters
params = {
"engine": "google", # search engine. Google, Bing, Yahoo, Naver, Baidu...
"q": query, # search query
"tbm": "isch", # image results
"num": "100", # number of images per page
"ijn": 0, # page number: 0 -> first page, 1 -> second...
"api_key": os.getenv("API_KEY") # your serpapi api key
# other query parameters: hl (lang), gl (country), etc
}
search = GoogleSearch(params) # where data extraction happens
images_is_present = True
while images_is_present:
results = search.get_dict() # JSON -> Python dictionary
# checks for "Google hasn't returned any results for this query."
if "error" not in results:
for image in results["images_results"]:
if image["original"] not in image_results:
image_results.append(image["original"])
# update to the next page
params["ijn"] += 1
else:
print(results["error"])
images_is_present = False
# -----------------------
# Downloading images
for index, image in enumerate(results["images_results"], start=1):
print(f"Downloading {index} image...")
opener=urllib.request.build_opener()
opener.addheaders=[("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36")]
urllib.request.install_opener(opener)
urllib.request.urlretrieve(image["original"], f"SerpApi_Images/original_size_img_{index}.jpg")
print(json.dumps(image_results, indent=2))
print(len(image_results))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment