Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Full Example: Scrape Google Scholar Profile and Author Results with SerpApi
from serpapi import GoogleSearch
import os
def serpapi_scrape_profile_results_combo():
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google_scholar_profiles",
"hl": "en",
"mauthors": "samsung"
}
search = GoogleSearch(params)
results = search.get_dict()
author_ids = []
for result in results['profiles']:
name = result['name']
try:
email = result['email']
except:
email = None
author_id = result['author_id']
affiliation = result['affiliations']
cited_by = result['cited_by']
interests = result['interests'][0]['title']
interests_link = result['interests'][0]['link']
author_ids.append(author_id)
# Delete prints that not needed
print(f'{name}\n{email}\n{author_id}\n{affiliation}\n{cited_by}\n{interests}\n{interests_link}\n')
return author_ids
def serpapi_scrape_author_result_combo(profiles):
for id in profiles:
params = {
"api_key": os.getenv("API_KEY"),
"engine": "google_scholar_author",
"author_id": id,
"hl": "en",
}
search = GoogleSearch(params)
results = search.get_dict()
# Author info
name = results['author']['name']
affiliations = results['author']['affiliations']
email = results['author']['email']
# Add as many interests as needed by adding additional indexes [3] [4] [5] [6] etc.
try:
interests1 = results['author']['interests'][0]['title']
interests2 = results['author']['interests'][1]['title']
except:
interests1 = None
interests2 = None
print('Author Info:')
print(f'{name}\n{affiliations}\n{email}\n{interests1}\n{interests2}\n')
# Articles Results
for article in results['articles']:
article_title = article['title']
article_link = article['link']
article_authors = article['authors']
try:
article_publication = article['publication']
except:
article_publication = None
cited_by = article['cited_by']['value']
cited_by_link = article['cited_by']['link']
article_year = article['year']
print('Articles Info:')
print(f"Title: {article_title}\nLink: {article_link}\nAuthors: {article_authors}\nPublication: {article_publication}\nCited by: {cited_by}\nCited by link: {cited_by_link}\nPublication year: {article_year}\n")
# Cited By and Public Access Results
citations_all = results['cited_by']['table'][0]['citations']['all']
citations_2016 = results['cited_by']['table'][0]['citations']['since_2016']
h_inedx_all = results['cited_by']['table'][1]['h_index']['all']
h_index_2016 = results['cited_by']['table'][1]['h_index']['since_2016']
i10_index_all = results['cited_by']['table'][2]['i10_index']['all']
i10_index_2016 = results['cited_by']['table'][2]['i10_index']['since_2016']
print('Citations Info:')
print(f'{citations_all}\n{citations_2016}\n{h_inedx_all}\n{h_index_2016}\n{i10_index_all}\n{i10_index_2016}\n')
public_access_link = results['public_access']['link']
public_access_available_articles = results['public_access']['available']
print('Public Access Info:')
print(f'{public_access_link}\n{public_access_available_articles}\n')
# Graph results
try:
for graph_results in results['cited_by']['graph']:
year = graph_results['year']
citations = graph_results['citations']
print(f'{year} {citations}\n')
except:
pass
# Co-Authors Results
try:
for authors in results['co_authors']:
author_name = authors['name']
author_affiliations = authors['affiliations']
author_link = authors['link']
print('Co-Authour(s):')
print(f'{author_name}\n{author_affiliations}\n{author_link}\n')
except:
pass
profiles = serpapi_scrape_profile_results_combo()
serpapi_scrape_author_result_combo(profiles)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment