Skip to content

Instantly share code, notes, and snippets.

View dimitryzub's full-sized avatar
🇺🇦
Grateful

Dmitiry Zub☀️ dimitryzub

🇺🇦
Grateful
View GitHub Profile
@dimitryzub
dimitryzub / scrape-google-images-python.py
Last active April 20, 2024 01:18
Web scraping all Google Images in Python
# Step-by-step blog post: https://serpapi.com/blog/scrape-google-images-with-python/
# There's an API solution with a video tutorial: https://www.youtube.com/watch?v=QuCPV6_GT6o
import os, requests, lxml, re, json, urllib.request
from bs4 import BeautifulSoup
from serpapi import GoogleSearch
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36"
}
@dimitryzub
dimitryzub / google-scholar-organic-cite-results-to-csv.py
Last active March 26, 2024 02:01
Web Scraping Google Scholar Organic, Cite Results to CSV with Python | SerpApi
# Video tutorial - https://www.youtube.com/watch?v=IXcycQwpFH0
# https://serpapi.com/google-scholar-api
from serpapi import GoogleSearch
from urllib.parse import urlsplit, parse_qsl
import pandas as pd
import os, json
def scrape_organic_results():
@dimitryzub
dimitryzub / researchgate-scrape-all-authors.py
Last active February 5, 2024 00:24
Scrape ResearchGate all Author, Researchers profiles in Pytohn
# scraped url: https://www.researchgate.net/search/researcher?q=Coffee&page=1
# blog post:
from parsel import Selector
from playwright.sync_api import sync_playwright
import json
def scrape_researchgate_profile(query: str):
with sync_playwright() as p:
@dimitryzub
dimitryzub / google-scholar-author-results-serpapi.py
Created June 8, 2022 10:28
Scrape Google Scholar Author Profile and All Author Publications to CSV in Pyhton | SerpApi
from serpapi import GoogleSearch
from urllib.parse import urlsplit, parse_qsl
import pandas as pd
import os, json
def scrape_google_scholar_author():
params = {
"api_key": os.getenv("API_KEY"), # SerpApi API key
"engine": "google_scholar_author", # author results search engine
@dimitryzub
dimitryzub / scrape_google_finance_ticker_python.py
Last active October 24, 2023 15:14
A script that scrapes Google Finance Ticker in Python - google.com/finance/quote/
import nasdaqdatalink
import requests, json, re
from parsel import Selector
from itertools import zip_longest
def scrape_google_finance(ticker: str):
params = {
"hl": "en" # language
}
@dimitryzub
dimitryzub / bs4_scrape_google_scholar_organic_results.py
Last active April 18, 2023 22:31
Scrape Google Scholar Organic Results with Python
from bs4 import BeautifulSoup
import requests, lxml, os, json
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
proxies = {
'http': os.getenv('HTTP_PROXY') # or just type proxy here without os.getenv()
@dimitryzub
dimitryzub / open_source_licenses.md
Created January 21, 2023 09:16 — forked from nicolasdao/open_source_licenses.md
What you need to know to choose an open source license.
# pip install google-search-results
import os, json
from serpapi import GoogleSearch
from urllib.parse import urlsplit, parse_qsl
def serpapi_scrape(query: str, website: str):
params = {
# https://docs.python.org/3/library/os.html#os.getenv
"api_key": os.getenv("API_KEY"), # your serpapi API key
"engine": "google_scholar", # search engine
# blog: https://serpapi.com/blog/scrape-google-scholar-papers-within-a-particular-conference-in-python/
from parsel import Selector
import requests, json, os
def check_sources(source: list or str):
if isinstance(source, str):
return source # NIPS
elif isinstance(source, list):
@dimitryzub
dimitryzub / scrape-google-images-python-serpapi.py
Last active October 3, 2022 09:52
Scrape Google Images with Python and SerpApi web scraping library
def serpapi_get_google_images():
image_results = []
for query in ["Coffee", "boat", "skyrim", "minecraft"]:
# search query parameters
params = {
"engine": "google", # search engine. Google, Bing, Yahoo, Naver, Baidu...
"q": query, # search query
"tbm": "isch", # image results