Skip to content

Instantly share code, notes, and snippets.

@skipperkongen
Last active February 7, 2019 13:35
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save skipperkongen/06df22d8fb3ec4028aaa92996e6abb2e to your computer and use it in GitHub Desktop.
Save skipperkongen/06df22d8fb3ec4028aaa92996e6abb2e to your computer and use it in GitHub Desktop.
Scrape images from the Google Images
# pip install icrawler
from icrawler.builtin import GoogleImageCrawler
import argparse
if __name__=='__main__':
parser = argparse.ArgumentParser(description='Scrape some images.')
parser.add_argument('keywords', metavar = 'KEYWORDS', nargs = '+',
help='keywords to download images for')
parser.add_argument('-n', '--max-num', type = int, default = 10,
help = 'Maximum number of images to download for each keyword')
parser.add_argument('-W', '--min-width', type=int, default=200,
help = 'Minimum width of images in pixels')
parser.add_argument('-H', '--min-height', type=int, default=200,
help = 'Minimum height of images in pixels')
parser.add_argument('-d', '--directory', default='images',
help = 'Directory in which to store images')
args = parser.parse_args()
for keyword in args.keywords:
google_crawler = GoogleImageCrawler(
parser_threads=2,
downloader_threads=4,
storage={'root_dir': '{}/{}'.format(args.directory, keyword)}
)
google_crawler.crawl(
keyword=keyword,
max_num=args.max_num,
min_size=(args.min_width, args.min_height)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment