From this post on the fastai forums, we discussed the best way to get more than 150 images from the Bing Search API. We came up with a solution to modify the utils.py
as follows:
# +
# pip install azure-cognitiveservices-search-imagesearch
import logging
from azure.cognitiveservices.search.imagesearch import ImageSearchClient as api
from msrest.authentication import CognitiveServicesCredentials as auth
def search_images_bing(key, term, total_count=150, min_sz=128):
"""Search for images using the Bing API
:param key: Your Bing API key
:type key: str
:param term: The search term to search for
:type term: str
:param total_count: The total number of images you want to return (default is 150)
:type total_count: int
:param min_sz: the minimum height and width of the images to search for (default is 128)
:type min_sz: int
:returns: An L-collection of ImageObject
:rtype: L
"""
max_count = 150
client = api("https://api.cognitive.microsoft.com", auth(key))
imgs = L.concat(
L(
client.images.search(
query=term,
min_height=min_sz,
min_width=min_sz,
count=count,
offset=offset,
).value
for count, offset in (
(
max_count
if total_count - offset > max_count
else total_count - offset,
offset,
)
for offset in range(0, total_count, max_count)
)
)
)
if len(imgs) < total_count:
logging.warning(
f"Bing only found {len(imgs)} images for '{term}'. Total requested was {total_count}."
)
return imgs
Note: Bing may return duplicate images with the same URLs. To remove duplicate URLs, try something like:
results = search_images_bing(key, "grizzly bear", 1000)
urls = results.attrgot("content_url").unique()