Dmitiry Zub☀️ dimitryzub

## duckduckgo_scrape_inline_images.py
from selenium import webdriver
import re, urllib.parse

driver = webdriver.Chrome(executable_path='path/to/chromedriver.exe')
driver.get('https://duckduckgo.com/?q=elon musk dogecoin&kl=us-en&ia=web')

for result in driver.find_elements_by_css_selector('.js-images-link'):
    title = result.find_element_by_css_selector('.js-images-link a img').get_attribute('alt')
    link = result.find_element_by_css_selector('.js-images-link a').get_attribute('href')
    thumbnail_encoded = result.find_element_by_css_selector('.js-images-link a img').get_attribute('src')

## scrape_all_bing_video_results.py
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome(executable_path='path/to/chromedriver.exe')
driver.get('https://www.bing.com/videos/search?q=somebody+toucha+my+spaghet&FORM=HDRSC3&cc=us')
time.sleep(1)

# scrolls until "more videos" button is located
while True:

## bing_bs4_scrape_first_10_video_results.py
from bs4 import BeautifulSoup
import requests, lxml

headers = {
    "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
}

params = {
    "q": "somebody toucha my spaghet",

## duckduckgo_scrape_knowledge_graph.py
from selenium import webdriver

driver = webdriver.Chrome(executable_path='PATH/TO/chromedriver.exe')
# &iax=about - expanded knowledge graph
driver.get('https://duckduckgo.com/?q=elon musk&kl=us-en&ia=web&iax=about')


title = driver.find_element_by_css_selector('.module__title__link').text

try:

## brave_search_organic_results.py
from bs4 import BeautifulSoup
import requests, lxml, json

headers = {
    'User-agent':
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}

params = {'q': 'dune film', 'source': 'web'}

## scrape_naver_video_results_in_python.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                dimitryzub
                / scrape_naver_video_results_in_python.md
            
            
              Last active
              April 4, 2022 11:27
            
              
                Scrape Naver Video Results in Python and SerpApi web-scraping library.
              
          
    What will be scraped


Prerequisites

Basic knowledge scraping with CSS selectors
If you haven't scraped with CSS selectors, there's a dedicated blog post of mine about how to use CSS selectors when web-scraping that covers what it is, pros and cons, and why they're matter from a web-scraping perspective.

  
## scrape_google_finance_ticker_python.py
import nasdaqdatalink
import requests, json, re
from parsel import Selector
from itertools import zip_longest


def scrape_google_finance(ticker: str):
    params = {
        "hl": "en" # language
        }

## scrape_google_scholar_profiles_from_certain_university.py
from parsel import Selector
import requests, re, json

def scrape_all_profiles_from_university(university_name: str):

    # https://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls
    params = {
        "view_op": "search_authors",  # author results
        "mauthors": university_name,  # search query
        "hl": "en",                   # language

## google-scholar-papers-from-certain-website.py
from parsel import Selector
import requests, json, os


def check_websites(website: list or str):
    if isinstance(website, str):
        return website                                           # cabdirect.org
    elif isinstance(website, list):
        return " OR ".join([f'site:{site}' for site in website]) # site:cabdirect.org OR site:cab.com

## google-scholar-papers-from-certain-website-serpapi.py
# pip install google-search-results
import os, json
from serpapi import GoogleSearch
from urllib.parse import urlsplit, parse_qsl

def serpapi_scrape(query: str, website: str):
    params = {
        # https://docs.python.org/3/library/os.html#os.getenv
        "api_key": os.getenv("API_KEY"), # your serpapi API key
        "engine": "google_scholar",      # search engine
	from selenium import webdriver
	import re, urllib.parse

	driver = webdriver.Chrome(executable_path='path/to/chromedriver.exe')
	driver.get('https://duckduckgo.com/?q=elon musk dogecoin&kl=us-en&ia=web')

	for result in driver.find_elements_by_css_selector('.js-images-link'):
	title = result.find_element_by_css_selector('.js-images-link a img').get_attribute('alt')
	link = result.find_element_by_css_selector('.js-images-link a').get_attribute('href')
	thumbnail_encoded = result.find_element_by_css_selector('.js-images-link a img').get_attribute('src')
	import time
	from selenium import webdriver
	from selenium.webdriver.common.keys import Keys

	driver = webdriver.Chrome(executable_path='path/to/chromedriver.exe')
	driver.get('https://www.bing.com/videos/search?q=somebody+toucha+my+spaghet&FORM=HDRSC3&cc=us')
	time.sleep(1)

	# scrolls until "more videos" button is located
	while True:
	from bs4 import BeautifulSoup
	import requests, lxml

	headers = {
	"User-Agent":
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36"
	}

	params = {
	"q": "somebody toucha my spaghet",
	from selenium import webdriver

	driver = webdriver.Chrome(executable_path='PATH/TO/chromedriver.exe')
	# &iax=about - expanded knowledge graph
	driver.get('https://duckduckgo.com/?q=elon musk&kl=us-en&ia=web&iax=about')


	title = driver.find_element_by_css_selector('.module__title__link').text

	try:
	from bs4 import BeautifulSoup
	import requests, lxml, json

	headers = {
	'User-agent':
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
	}

	params = {'q': 'dune film', 'source': 'web'}
	import nasdaqdatalink
	import requests, json, re
	from parsel import Selector
	from itertools import zip_longest


	def scrape_google_finance(ticker: str):
	params = {
	"hl": "en" # language
	}
	from parsel import Selector
	import requests, re, json

	def scrape_all_profiles_from_university(university_name: str):

	# https://docs.python-requests.org/en/master/user/quickstart/#passing-parameters-in-urls
	params = {
	"view_op": "search_authors", # author results
	"mauthors": university_name, # search query
	"hl": "en", # language
	from parsel import Selector
	import requests, json, os


	def check_websites(website: list or str):
	if isinstance(website, str):
	return website # cabdirect.org
	elif isinstance(website, list):
	return " OR ".join([f'site:{site}' for site in website]) # site:cabdirect.org OR site:cab.com
	# pip install google-search-results
	import os, json
	from serpapi import GoogleSearch
	from urllib.parse import urlsplit, parse_qsl

	def serpapi_scrape(query: str, website: str):
	params = {
	# https://docs.python.org/3/library/os.html#os.getenv
	"api_key": os.getenv("API_KEY"), # your serpapi API key
	"engine": "google_scholar", # search engine