Skip to content

Instantly share code, notes, and snippets.

Avatar
💫

Dimitry Zub☀️ dimitryzub

💫
View GitHub Profile
@dimitryzub
dimitryzub / bs4_scrape_google_shopping_ads.py
Last active May 13, 2021
medium_scrape_google_shopping_ads
View bs4_scrape_google_shopping_ads.py
import requests, lxml, urllib.parse
from bs4 import BeautifulSoup
# Adding User-agent (default user-agent from requests library is 'python-requests')
# https://github.com/psf/requests/blob/589c4547338b592b1fb77c65663d8aa6fbb7e38b/requests/utils.py#L808-L814
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36 Edge/18.19582"
}
@dimitryzub
dimitryzub / bs4_scrape_google_ads.py
Last active May 13, 2021
medium_scrape_google_ads
View bs4_scrape_google_ads.py
import requests, lxml, urllib.parse
from bs4 import BeautifulSoup
# Adding user-agent to fake real user visit
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36 Edge/18.19582"
}
# Search query
@dimitryzub
dimitryzub / serpapi_scrape_google_shopping_ads.py
Last active May 13, 2021
serpapi_scrape_google_shopping_ads
View serpapi_scrape_google_shopping_ads.py
import os
from serpapi import GoogleSearch
# Search query parameters
params = {
"engine": "google",
"q": "cpu buy",
"api_key": os.getenv("API_KEY"),
}
@dimitryzub
dimitryzub / serpapi_scrape_google_ads.py
Last active May 17, 2021
serpapi_scrape_google_ads
View serpapi_scrape_google_ads.py
import os
from serpapi import GoogleSearch
params = {
"engine": "google",
"q": "kitchen table",
"api_key": os.getenv("API_KEY"),
"no_cache":"true" # add this param if it throws an error
}
@dimitryzub
dimitryzub / bs4_scrape_google_scholar_organic_results.py
Last active May 22, 2021
Scrape Google Scholar Organic Results with Python
View bs4_scrape_google_scholar_organic_results.py
from bs4 import BeautifulSoup
import requests, lxml, os, json
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
proxies = {
'http': os.getenv('HTTP_PROXY') # or just type proxy here without os.getenv()
@dimitryzub
dimitryzub / serpapi_scrape_google_scholar_organic_results.py
Last active May 20, 2021
Scrape Google Scholar Organic Results with SerpApi
View serpapi_scrape_google_scholar_organic_results.py
from serpapi import GoogleSearch
import os, json
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google_scholar",
"q": "samsung",
}
search = GoogleSearch(params)
@dimitryzub
dimitryzub / scrape_google_scholar_profile_results.py
Last active May 23, 2021
Scrape Google Scholar Profile Results with Python
View scrape_google_scholar_profile_results.py
from bs4 import BeautifulSoup
import requests, lxml, os
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
proxies = {
'http': os.getenv('HTTP_PROXY')
@dimitryzub
dimitryzub / serpapi_scrape_google_scholar_profile_results.py
Last active May 23, 2021
Scrape Google Scholar Profile Results using SerpApi
View serpapi_scrape_google_scholar_profile_results.py
from serpapi import GoogleSearch
import os
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google_scholar_profiles",
"hl": "en",
"mauthors": "samsung"
}
@dimitryzub
dimitryzub / python_scrape_google_scholar_profile_author_results.py
Last active May 29, 2021
Scrape Google Scholar Profile-Author Results with Python
View python_scrape_google_scholar_profile_author_results.py
from bs4 import BeautifulSoup
import requests, lxml, os, json
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
proxies = {
'http': os.getenv('HTTP_PROXY')
@dimitryzub
dimitryzub / serpapi_google_scholar_cite_results.py
Created May 19, 2021
Scrape Google Scholar Cite Results with SerpApi
View serpapi_google_scholar_cite_results.py
from serpapi import GoogleSearch
import os
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google_scholar_cite",
"q": "FDc6HiktlqEJ"
}
search = GoogleSearch(params)