Skip to content

Instantly share code, notes, and snippets.

@dimitryzub
Last active August 4, 2021 08:38
Show Gist options
  • Save dimitryzub/5586a7015fcd058450bf68854548c4cb to your computer and use it in GitHub Desktop.
Save dimitryzub/5586a7015fcd058450bf68854548c4cb to your computer and use it in GitHub Desktop.
from selenium import webdriver
import urllib.parse, re
driver = webdriver.Chrome(executable_path='path/tochromedriver.exe')
driver.get('https://duckduckgo.com/?q=elon musk&kl=us-en&ia=web')
for result in driver.find_elements_by_css_selector('#m1-0 .has-image'):
title = result.find_element_by_css_selector('#m1-0 .js-carousel-item-title').text.strip()
link = result.find_element_by_css_selector('#m1-0 .js-carousel-item-title').get_attribute('href')
source = result.find_element_by_css_selector('#m1-0 .result__url').text
date = result.find_element_by_css_selector('#m1-0 .tile__time').text
thumbnail_encoded = result.find_element_by_css_selector('#m1-0 .module--carousel__image').get_attribute('style')
# https://regex101.com/r/98r2qW/1
match_thumbnail_urls = ''.join(re.findall(r'background-image: url\(\"\/\/external-content\.duckduckgo\.com\/iu\/\?u=(.*)&f=1&h=110\"\);', thumbnail_encoded))
# https://www.kite.com/python/answers/how-to-decode-a-utf-8-url-in-python
thumbnail = urllib.parse.unquote(match_thumbnail_urls)
print(f'{title}\n{link}\n{source}\n{date}\n{thumbnail}\n')
driver.quit()
-------------------
'''
Elon Musk admits Tesla's Cybertruck could flop
https://www.cnbc.com/2021/07/15/elon-musk-admits-the-cybertruck-could-flop.html
CNBC
4h
https://image.cnbcfm.com/api/v1/image/106261274-1574442599483rtx7a0ls.jpg?v=1574452686
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment