Skip to content

Instantly share code, notes, and snippets.

@ryanmuller
Created December 4, 2020 19:29
Show Gist options
  • Save ryanmuller/2f8b9754b72bbf98bd70f0c974243ecd to your computer and use it in GitHub Desktop.
Save ryanmuller/2f8b9754b72bbf98bd70f0c974243ecd to your computer and use it in GitHub Desktop.
scrape google scholar for roam as chrome extension
chrome.runtime.onMessage.addListener(function(msg, sender, callback) {
let title, year, journal, url;
let authors = [];
const citationRows = document.getElementById('gs_citt').getElementsByTagName('tr');
for (let i = 0; i < citationRows.length; i++) {
if (citationRows[i].getElementsByTagName('th')[0].textContent === 'Chicago') {
const chicagoCitation = citationRows[i].getElementsByTagName('td')[1].textContent;
// TODO: improve year match
const matches = chicagoCitation.match(/^(.*)\.\ "(.*)\."\ (?:In\ )?([^,]*).*\b(\d\d\d\d)\b/);
const authorParts = matches[1].split(', ');
authorParts[authorParts.length - 1] = authorParts[authorParts.length - 1].replace('and ', '');
authors.push(authorParts[1] + ' ' + authorParts[0]);
authors = authors.concat(authorParts.slice(2));
title = matches[2];
journal = matches[3];
year = matches[4];
}
}
// alternative option for title: document.getElementsByTagName('h3')[0].textContent;
const links = document.getElementsByClassName('gs_or_ggsm');
if (links.length) {
url = links[0].getElementsByTagName('a')[0].href;
}
callback({ title, authors, year, journal, url });
});
<!DOCTYPE html>
<html>
<body>
<label>title</label>
<textarea id="title" cols="80"></textarea>
<label>meta</label>
<textarea id="meta" cols="80" rows="3"></textarea>
<script src="popup.js"></script>
</body>
</html>
const titleOut = document.getElementById('title');
const metaOut = document.getElementById('meta');
chrome.tabs.query({active: true, currentWindow: true}, function(tabs) {
chrome.tabs.sendMessage(tabs[0].id, {}, function(data) {
const authorStr = data.authors.map(a => `[[${a}]]`).join(', ');
titleOut.value = `[[${data.title}]]`;
metaOut.value =
`* **Authors:** ${authorStr}
* **Year:** ${data.year}
* **Publication:** ${data.journal}
* **URL:** ${data.url}`;
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment