|
from serpapi import GoogleSearch |
|
import os |
|
|
|
def serpapi_scrape_profile_results_combo(): |
|
params = { |
|
"api_key": os.getenv("API_KEY"), |
|
"engine": "google_scholar_profiles", |
|
"hl": "en", |
|
"mauthors": "samsung" |
|
} |
|
|
|
search = GoogleSearch(params) |
|
results = search.get_dict() |
|
|
|
author_ids = [] |
|
|
|
for result in results['profiles']: |
|
name = result['name'] |
|
try: |
|
email = result['email'] |
|
except: |
|
email = None |
|
author_id = result['author_id'] |
|
affiliation = result['affiliations'] |
|
cited_by = result['cited_by'] |
|
interests = result['interests'][0]['title'] |
|
interests_link = result['interests'][0]['link'] |
|
|
|
author_ids.append(author_id) |
|
|
|
# Delete prints that not needed |
|
print(f'{name}\n{email}\n{author_id}\n{affiliation}\n{cited_by}\n{interests}\n{interests_link}\n') |
|
|
|
return author_ids |
|
|
|
|
|
def serpapi_scrape_author_result_combo(profiles): |
|
|
|
for id in profiles: |
|
params = { |
|
"api_key": os.getenv("API_KEY"), |
|
"engine": "google_scholar_author", |
|
"author_id": id, |
|
"hl": "en", |
|
} |
|
|
|
search = GoogleSearch(params) |
|
results = search.get_dict() |
|
|
|
# Author info |
|
name = results['author']['name'] |
|
affiliations = results['author']['affiliations'] |
|
email = results['author']['email'] |
|
# Add as many interests as needed by adding additional indexes [3] [4] [5] [6] etc. |
|
try: |
|
interests1 = results['author']['interests'][0]['title'] |
|
interests2 = results['author']['interests'][1]['title'] |
|
except: |
|
interests1 = None |
|
interests2 = None |
|
|
|
print('Author Info:') |
|
print(f'{name}\n{affiliations}\n{email}\n{interests1}\n{interests2}\n') |
|
|
|
# Articles Results |
|
for article in results['articles']: |
|
article_title = article['title'] |
|
article_link = article['link'] |
|
article_authors = article['authors'] |
|
try: |
|
article_publication = article['publication'] |
|
except: |
|
article_publication = None |
|
cited_by = article['cited_by']['value'] |
|
cited_by_link = article['cited_by']['link'] |
|
article_year = article['year'] |
|
|
|
print('Articles Info:') |
|
print(f"Title: {article_title}\nLink: {article_link}\nAuthors: {article_authors}\nPublication: {article_publication}\nCited by: {cited_by}\nCited by link: {cited_by_link}\nPublication year: {article_year}\n") |
|
|
|
# Cited By and Public Access Results |
|
citations_all = results['cited_by']['table'][0]['citations']['all'] |
|
citations_2016 = results['cited_by']['table'][0]['citations']['since_2016'] |
|
h_inedx_all = results['cited_by']['table'][1]['h_index']['all'] |
|
h_index_2016 = results['cited_by']['table'][1]['h_index']['since_2016'] |
|
i10_index_all = results['cited_by']['table'][2]['i10_index']['all'] |
|
i10_index_2016 = results['cited_by']['table'][2]['i10_index']['since_2016'] |
|
|
|
print('Citations Info:') |
|
print(f'{citations_all}\n{citations_2016}\n{h_inedx_all}\n{h_index_2016}\n{i10_index_all}\n{i10_index_2016}\n') |
|
|
|
public_access_link = results['public_access']['link'] |
|
public_access_available_articles = results['public_access']['available'] |
|
|
|
print('Public Access Info:') |
|
print(f'{public_access_link}\n{public_access_available_articles}\n') |
|
|
|
# Graph results |
|
try: |
|
for graph_results in results['cited_by']['graph']: |
|
year = graph_results['year'] |
|
citations = graph_results['citations'] |
|
|
|
print(f'{year} {citations}\n') |
|
except: |
|
pass |
|
# Co-Authors Results |
|
try: |
|
for authors in results['co_authors']: |
|
author_name = authors['name'] |
|
author_affiliations = authors['affiliations'] |
|
author_link = authors['link'] |
|
|
|
print('Co-Authour(s):') |
|
print(f'{author_name}\n{author_affiliations}\n{author_link}\n') |
|
except: |
|
pass |
|
|
|
|
|
profiles = serpapi_scrape_profile_results_combo() |
|
serpapi_scrape_author_result_combo(profiles) |