Instantly share code, notes, and snippets.
Last active May 30, 2021
Full Example: Scrape Google Scholar Profile and Author Results with SerpApi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from serpapi import GoogleSearch | |
import os | |
def serpapi_scrape_profile_results_combo(): | |
params = { | |
"api_key": os.getenv("API_KEY"), | |
"engine": "google_scholar_profiles", | |
"hl": "en", | |
"mauthors": "samsung" | |
} | |
search = GoogleSearch(params) | |
results = search.get_dict() | |
author_ids = [] | |
for result in results['profiles']: | |
name = result['name'] | |
try: | |
email = result['email'] | |
except: | |
email = None | |
author_id = result['author_id'] | |
affiliation = result['affiliations'] | |
cited_by = result['cited_by'] | |
interests = result['interests'][0]['title'] | |
interests_link = result['interests'][0]['link'] | |
author_ids.append(author_id) | |
# Delete prints that not needed | |
print(f'{name}\n{email}\n{author_id}\n{affiliation}\n{cited_by}\n{interests}\n{interests_link}\n') | |
return author_ids | |
def serpapi_scrape_author_result_combo(profiles): | |
for id in profiles: | |
params = { | |
"api_key": os.getenv("API_KEY"), | |
"engine": "google_scholar_author", | |
"author_id": id, | |
"hl": "en", | |
} | |
search = GoogleSearch(params) | |
results = search.get_dict() | |
# Author info | |
name = results['author']['name'] | |
affiliations = results['author']['affiliations'] | |
email = results['author']['email'] | |
# Add as many interests as needed by adding additional indexes [3] [4] [5] [6] etc. | |
try: | |
interests1 = results['author']['interests'][0]['title'] | |
interests2 = results['author']['interests'][1]['title'] | |
except: | |
interests1 = None | |
interests2 = None | |
print('Author Info:') | |
print(f'{name}\n{affiliations}\n{email}\n{interests1}\n{interests2}\n') | |
# Articles Results | |
for article in results['articles']: | |
article_title = article['title'] | |
article_link = article['link'] | |
article_authors = article['authors'] | |
try: | |
article_publication = article['publication'] | |
except: | |
article_publication = None | |
cited_by = article['cited_by']['value'] | |
cited_by_link = article['cited_by']['link'] | |
article_year = article['year'] | |
print('Articles Info:') | |
print(f"Title: {article_title}\nLink: {article_link}\nAuthors: {article_authors}\nPublication: {article_publication}\nCited by: {cited_by}\nCited by link: {cited_by_link}\nPublication year: {article_year}\n") | |
# Cited By and Public Access Results | |
citations_all = results['cited_by']['table'][0]['citations']['all'] | |
citations_2016 = results['cited_by']['table'][0]['citations']['since_2016'] | |
h_inedx_all = results['cited_by']['table'][1]['h_index']['all'] | |
h_index_2016 = results['cited_by']['table'][1]['h_index']['since_2016'] | |
i10_index_all = results['cited_by']['table'][2]['i10_index']['all'] | |
i10_index_2016 = results['cited_by']['table'][2]['i10_index']['since_2016'] | |
print('Citations Info:') | |
print(f'{citations_all}\n{citations_2016}\n{h_inedx_all}\n{h_index_2016}\n{i10_index_all}\n{i10_index_2016}\n') | |
public_access_link = results['public_access']['link'] | |
public_access_available_articles = results['public_access']['available'] | |
print('Public Access Info:') | |
print(f'{public_access_link}\n{public_access_available_articles}\n') | |
# Graph results | |
try: | |
for graph_results in results['cited_by']['graph']: | |
year = graph_results['year'] | |
citations = graph_results['citations'] | |
print(f'{year} {citations}\n') | |
except: | |
pass | |
# Co-Authors Results | |
try: | |
for authors in results['co_authors']: | |
author_name = authors['name'] | |
author_affiliations = authors['affiliations'] | |
author_link = authors['link'] | |
print('Co-Authour(s):') | |
print(f'{author_name}\n{author_affiliations}\n{author_link}\n') | |
except: | |
pass | |
profiles = serpapi_scrape_profile_results_combo() | |
serpapi_scrape_author_result_combo(profiles) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment