Last active
October 3, 2022 09:52
-
-
Save dimitryzub/12c9b70449cd8dd9c1184504e60af41e to your computer and use it in GitHub Desktop.
Scrape Google Images with Python and SerpApi web scraping library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def serpapi_get_google_images(): | |
image_results = [] | |
for query in ["Coffee", "boat", "skyrim", "minecraft"]: | |
# search query parameters | |
params = { | |
"engine": "google", # search engine. Google, Bing, Yahoo, Naver, Baidu... | |
"q": query, # search query | |
"tbm": "isch", # image results | |
"num": "100", # number of images per page | |
"ijn": 0, # page number: 0 -> first page, 1 -> second... | |
"api_key": os.getenv("API_KEY") # your serpapi api key | |
# other query parameters: hl (lang), gl (country), etc | |
} | |
search = GoogleSearch(params) # where data extraction happens | |
images_is_present = True | |
while images_is_present: | |
results = search.get_dict() # JSON -> Python dictionary | |
# checks for "Google hasn't returned any results for this query." | |
if "error" not in results: | |
for image in results["images_results"]: | |
if image["original"] not in image_results: | |
image_results.append(image["original"]) | |
# update to the next page | |
params["ijn"] += 1 | |
else: | |
print(results["error"]) | |
images_is_present = False | |
# ----------------------- | |
# Downloading images | |
for index, image in enumerate(results["images_results"], start=1): | |
print(f"Downloading {index} image...") | |
opener=urllib.request.build_opener() | |
opener.addheaders=[("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36")] | |
urllib.request.install_opener(opener) | |
urllib.request.urlretrieve(image["original"], f"SerpApi_Images/original_size_img_{index}.jpg") | |
print(json.dumps(image_results, indent=2)) | |
print(len(image_results)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment