Skip to content

Instantly share code, notes, and snippets.

Avatar
💫

Dimitry Zub☀️ dimitryzub

💫
View GitHub Profile
@dimitryzub
dimitryzub / bs4_scrape_google_shopping_ads.py
Last active May 13, 2021
medium_scrape_google_shopping_ads
View bs4_scrape_google_shopping_ads.py
import requests, lxml, urllib.parse
from bs4 import BeautifulSoup
# Adding User-agent (default user-agent from requests library is 'python-requests')
# https://github.com/psf/requests/blob/589c4547338b592b1fb77c65663d8aa6fbb7e38b/requests/utils.py#L808-L814
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36 Edge/18.19582"
}
@dimitryzub
dimitryzub / bs4_scrape_google_ads.py
Last active May 13, 2021
medium_scrape_google_ads
View bs4_scrape_google_ads.py
import requests, lxml, urllib.parse
from bs4 import BeautifulSoup
# Adding user-agent to fake real user visit
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36 Edge/18.19582"
}
# Search query
@dimitryzub
dimitryzub / serpapi_scrape_google_shopping_ads.py
Last active May 13, 2021
serpapi_scrape_google_shopping_ads
View serpapi_scrape_google_shopping_ads.py
import os
from serpapi import GoogleSearch
# Search query parameters
params = {
"engine": "google",
"q": "cpu buy",
"api_key": os.getenv("API_KEY"),
}
@dimitryzub
dimitryzub / serpapi_scrape_google_ads.py
Last active May 17, 2021
serpapi_scrape_google_ads
View serpapi_scrape_google_ads.py
import os
from serpapi import GoogleSearch
params = {
"engine": "google",
"q": "kitchen table",
"api_key": os.getenv("API_KEY"),
"no_cache":"true" # add this param if it throws an error
}
@dimitryzub
dimitryzub / serpapi_google_scholar_cite_results.py
Created May 19, 2021
Scrape Google Scholar Cite Results with SerpApi
View serpapi_google_scholar_cite_results.py
from serpapi import GoogleSearch
import os
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google_scholar_cite",
"q": "FDc6HiktlqEJ"
}
search = GoogleSearch(params)
@dimitryzub
dimitryzub / serpapi_google_scholar_author_articles.py
Created May 19, 2021
Scrape Google Scholar Author Articles with SerpApi
View serpapi_google_scholar_author_articles.py
from serpapi import GoogleSearch
import os
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google_scholar_author",
"author_id": "9PepYk8AAAAJ",
"hl": "en",
}
@dimitryzub
dimitryzub / serpapi_google_scholar_author_results.py
Last active May 19, 2021
Scrape Google Scholar Author Results with SerpApi
View serpapi_google_scholar_author_results.py
from serpapi import GoogleSearch
import os
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google_scholar_author",
"author_id": "9PepYk8AAAAJ",
"hl": "en",
}
@dimitryzub
dimitryzub / serpapi_google_scholar_authors_citedby_results.py
Last active May 19, 2021
Scrape Google Scholar Authors CitedBy Results with SerpApi
View serpapi_google_scholar_authors_citedby_results.py
from serpapi import GoogleSearch
import os, json
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google_scholar_author",
"author_id": "9PepYk8AAAAJ",
"hl": "en",
}
@dimitryzub
dimitryzub / python_scrape_google_scholar_co_authors_results.py
Last active May 20, 2021
Scrape Google Scholar Co-Authors Results with Python
View python_scrape_google_scholar_co_authors_results.py
from bs4 import BeautifulSoup
import requests, lxml, os
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
proxies = {
'http': os.getenv('HTTP_PROXY')
@dimitryzub
dimitryzub / python_scrape_google_scholar_author_co_authors_results.py
Created May 20, 2021
Scrape Google Scholar Co-Authors Results with Python
View python_scrape_google_scholar_author_co_authors_results.py
from bs4 import BeautifulSoup
import requests, lxml, os
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
proxies = {
'http': os.getenv('HTTP_PROXY')