Skip to content

Instantly share code, notes, and snippets.

@Leumastai
Created November 8, 2021 01:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Leumastai/0b89d83ba7bd84388c774ee15b144f1a to your computer and use it in GitHub Desktop.
Save Leumastai/0b89d83ba7bd84388c774ee15b144f1a to your computer and use it in GitHub Desktop.
Script to scrape images from google images
#Uncomment the code below if running on Google Colab
""" %%capture
import sys
!pip install selenium
#!apt-get update # to update ubuntu to correctly run apt install
!apt install chromium-chromedriver
!cp /usr/lib/chromium-browser/chromedriver /usr/bin
sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver') """
import requests
from bs4 import BeautifulSoup
import lxml
import re
import os
from PIL import Image
import sys
import urllib.request, urllib.parse, urllib.error
import ssl
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
driver = webdriver.Chrome(
'/home/samuel/Downloads/Compressed/chromedriver')
driver.get("https://www.google.com/")
box = driver.find_element_by_xpath('//*[@id="sb_form_q"]')
#Change cranes to the object you wish to search for
box.send_keys("cranes")
box.send_keys(Keys.ENTER)
driver.find_element_by_xpath('//*[@id="b-scopeListItem-images"]/a').click()
#The line of code will keep scrolling down the webpage until it cannot scroll no more
last_height = driver.execute_script('return document.body.scrollHeight')
while True:
driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
time.sleep(2)
new_height = driver.execute_script('return document.body.scrollHeight')
try:
driver.find_element_by_xpath('//*[@id="islmp"]/div/div/div/div/div[5]/input').click()
time.sleep(2)
except:
pass
if new_height == last_height:
break
last_height = new_height
for i in range(1, 1000):
try:
driver.find_element_by_xpath('//*[@id="islrg"]/div[1]/div['+str(i)+']/a[1]/div[1]/img').screenshot('/home/samuel/images/crane/crane_'+str(i)+'.png')
#Change the path to save the images
except:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment