Skip to content

Instantly share code, notes, and snippets.

@dimitryzub
Created July 16, 2021 14:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dimitryzub/32ea63f9c6ed8c61e609af8e85bc22c2 to your computer and use it in GitHub Desktop.
Save dimitryzub/32ea63f9c6ed8c61e609af8e85bc22c2 to your computer and use it in GitHub Desktop.
from selenium import webdriver
import re, urllib.parse
driver = webdriver.Chrome(executable_path='path/to/chromedriver.exe')
driver.get('https://duckduckgo.com/?q=elon musk dogecoin&kl=us-en&ia=web')
for result in driver.find_elements_by_css_selector('.js-images-link'):
title = result.find_element_by_css_selector('.js-images-link a img').get_attribute('alt')
link = result.find_element_by_css_selector('.js-images-link a').get_attribute('href')
thumbnail_encoded = result.find_element_by_css_selector('.js-images-link a img').get_attribute('src')
# https://regex101.com/r/4pgG5m/1
match_thumbnail_urls = ''.join(re.findall(r'https\:\/\/external\-content\.duckduckgo\.com\/iu\/\?u\=(.*)&f=1', thumbnail_encoded))
# https://www.kite.com/python/answers/how-to-decode-a-utf-8-url-in-python
thumbnail = urllib.parse.unquote(match_thumbnail_urls).replace('&h=160', '')
image = result.get_attribute('data-id')
print(f'{title}\n{link}\n{thumbnail}\n{image}\n')
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment