Skip to content

Instantly share code, notes, and snippets.

View dimitryzub's full-sized avatar
🇺🇦
Grateful

Dmitiry Zub☀️ dimitryzub

🇺🇦
Grateful
View GitHub Profile
from selenium import webdriver
import re, urllib.parse
driver = webdriver.Chrome(executable_path='path/to/chromedriver.exe')
driver.get('https://duckduckgo.com/?q=elon musk dogecoin&kl=us-en&ia=web')
for result in driver.find_elements_by_css_selector('.js-images-link'):
title = result.find_element_by_css_selector('.js-images-link a img').get_attribute('alt')
link = result.find_element_by_css_selector('.js-images-link a').get_attribute('href')
thumbnail_encoded = result.find_element_by_css_selector('.js-images-link a img').get_attribute('src')
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome(executable_path='path/to/chromedriver.exe')
driver.get('https://www.bing.com/videos/search?q=somebody+toucha+my+spaghet&FORM=HDRSC3&cc=us')
time.sleep(1)
# scrolls until "more videos" button is located
while True:
from bs4 import BeautifulSoup
import requests, lxml
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}
params = {
"q": "somebody toucha my spaghet",
from selenium import webdriver
driver = webdriver.Chrome(executable_path='PATH/TO/chromedriver.exe')
# &iax=about - expanded knowledge graph
driver.get('https://duckduckgo.com/?q=elon musk&kl=us-en&ia=web&iax=about')
title = driver.find_element_by_css_selector('.module__title__link').text
try:
@dimitryzub
dimitryzub / brave_search_organic_results.py
Created October 26, 2021 08:26
Scrape Brave Search Organic Results using Python
from bs4 import BeautifulSoup
import requests, lxml, json
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
params = {'q': 'dune film', 'source': 'web'}
@dimitryzub
dimitryzub / scrape_naver_video_results_in_python.md
Last active April 4, 2022 11:27
Scrape Naver Video Results in Python and SerpApi web-scraping library.

What will be scraped

image

Prerequisites

Basic knowledge scraping with CSS selectors

If you haven't scraped with CSS selectors, there's a dedicated blog post of mine about how to use CSS selectors when web-scraping that covers what it is, pros and cons, and why they're matter from a web-scraping perspective.

@dimitryzub
dimitryzub / scrape_google_finance_ticker_python.py
Last active October 24, 2023 15:14
A script that scrapes Google Finance Ticker in Python - google.com/finance/quote/
import nasdaqdatalink
import requests, json, re
from parsel import Selector
from itertools import zip_longest
def scrape_google_finance(ticker: str):
params = {
"hl": "en" # language
}
@dimitryzub
dimitryzub / scrape_google_scholar_profiles_from_certain_university.py
Last active April 15, 2022 13:19
Scrapes all profiles from Google Scholar Profiles using pagination.
from parsel import Selector
import requests, re, json
def scrape_all_profiles_from_university(university_name: str):
# https://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls
params = {
"view_op": "search_authors", # author results
"mauthors": university_name, # search query
"hl": "en", # language
from parsel import Selector
import requests, json, os
def check_websites(website: list or str):
if isinstance(website, str):
return website # cabdirect.org
elif isinstance(website, list):
return " OR ".join([f'site:{site}' for site in website]) # site:cabdirect.org OR site:cab.com
# pip install google-search-results
import os, json
from serpapi import GoogleSearch
from urllib.parse import urlsplit, parse_qsl
def serpapi_scrape(query: str, website: str):
params = {
# https://docs.python.org/3/library/os.html#os.getenv
"api_key": os.getenv("API_KEY"), # your serpapi API key
"engine": "google_scholar", # search engine