Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dimitryzub/4470ee581dec046d9e84fa3cfaf848df to your computer and use it in GitHub Desktop.
Save dimitryzub/4470ee581dec046d9e84fa3cfaf848df to your computer and use it in GitHub Desktop.
# pip install google-search-results
import os, json
from serpapi import GoogleSearch
from urllib.parse import urlsplit, parse_qsl
def serpapi_scrape(query: str, website: str):
params = {
# https://docs.python.org/3/library/os.html#os.getenv
"api_key": os.getenv("API_KEY"), # your serpapi API key
"engine": "google_scholar", # search engine
"q": f"{query} site:{website}", # search query
"hl": "en", # language
# "as_ylo": "2017", # from 2017
# "as_yhi": "2021", # to 2021
"start": "0" # first page
}
search = GoogleSearch(params)
publications = []
publications_is_present = True
while publications_is_present:
results = search.get_dict()
print(f"Currently extracting page #{results.get('serpapi_pagination', {}).get('current')}..")
for result in results["organic_results"]:
position = result["position"]
title = result["title"]
publication_info_summary = result["publication_info"]["summary"]
result_id = result["result_id"]
link = result.get("link")
result_type = result.get("type")
snippet = result.get("snippet")
publications.append({
"page_number": results.get("serpapi_pagination", {}).get("current"),
"position": position + 1,
"result_type": result_type,
"title": title,
"link": link,
"result_id": result_id,
"publication_info_summary": publication_info_summary,
"snippet": snippet,
})
if "next" in results.get("serpapi_pagination", {}):
# splits URL in parts as a dict and passes it to a GoogleSearch() class.
search.params_dict.update(dict(parse_qsl(urlsplit(results["serpapi_pagination"]["next"]).query)))
else:
papers_is_present = False
print(json.dumps(organic_results_data, indent=2, ensure_ascii=False))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment