Skip to content

Instantly share code, notes, and snippets.

@napsternxg
Last active July 8, 2021 05:05
Show Gist options
  • Save napsternxg/95dff90ac559caca9c89b7ac6566fc9e to your computer and use it in GitHub Desktop.
Save napsternxg/95dff90ac559caca9c89b7ac6566fc9e to your computer and use it in GitHub Desktop.
Google Scholar Info Extractor
function getCitationInfo() {
t = document.querySelector("table#gsc_rsb_st")
cite_hist = document.querySelector("div.gsc_md_hist_b")
overall = Array.from(t.querySelectorAll("tr")).map((tr, i) => {
selector = i == 0 ? "th" : "td";
row = Array.from(tr.querySelectorAll(selector))
return row.map(td => td.innerText)
})
temporal = {
years: Array.from(cite_hist.querySelectorAll("span.gsc_g_t")).map(s => +s.innerText),
citations: Array.from(cite_hist.querySelectorAll("a.gsc_g_a")).map(a => +a.text)
}
return {
overall,
temporal
}
}
function getPaperInfo(p) {
link = p.querySelector("td.gsc_a_t a")
url = link.getAttribute("data-href")
title = link.text
citations = +p.querySelector("td.gsc_a_c a").text
year = +p.querySelector("td.gsc_a_y").innerText
authors = p.querySelector("td.gsc_a_t div.gs_gray").innerText.split(", ")
venue = p.querySelector("td.gsc_a_t div.gs_gray:last-child").innerText
return {
url,
title,
citations,
year,
authors,
venue
};
}
function getAllInfo() {
papers_e = Array.from(document.querySelectorAll("#gsc_a_tw tr.gsc_a_tr"));
papers = papers_e.map(getPaperInfo);
img = document.querySelector("img#gsc_prf_pup-img").src
name = document.querySelector("div#gsc_prf_in").innerText
affiliation_a = document.querySelector("div.gsc_prf_il a")
affiliation_url = affiliation_a.getAttribute("href")
affiliation_name = affiliation_a.text
affiliation = {
url: affiliation_url,
name: affiliation_name
},
citations = getCitationInfo()
return {
name,
img,
affiliation,
citations,
papers
};
}
getAllInfo();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment