Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Multi import of google scholar BibTex references
For an answer to:
This is rate-limited / prohibited by Google
function get_page($url) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
return $response;
// helper function
function get_xpath($query_url) {
$dom = new DOMDocument();
// "User behavior"
return new DOMXpath($dom);
// Loads google scholar and returns the reference ID's that need to be clicked
function get_reference_nodes($query_url) {
$xpath = get_xpath($query_url);
return $xpath->query('//body//a[@class="gs_nph"]/@onclick');
function get_reference_id($href) {
preg_match("/return gs_ocit\(event,'([a-zA-Z0-9_\-]*)','[0-9]*'\)/", $href, $tmp);
return isset($tmp[1]) ? $tmp[1] : null;
// Input : List of google scholar links
// Output : List of reference IDs
function extract_reference_ids($nodes) {
$links = array();
foreach ($nodes as $node) {
$links[] = get_reference_id($node->value);
return $links;
// Get a bibID based on initial cite link
function get_bibtex_id($ref_id) {
$query_url = "{$ref_id}";
$xpath = get_xpath($query_url);
// Return the first link (the bibTex import one)
return $xpath->query('//body//a[@class="gs_citi"][1]/@href');
function get_bibtex_imports($ref_ids) {
$results = array();
foreach ($ref_ids as $id) {
$tmp = get_bibtex_id($id);
// BibTex entry is first link on the page
$link = '' . $tmp->item(0)->value;
// Get the bibtex entry
$results[] = get_page($link);
return $results;
// Initial page request
$query = "Virtualization"; // example query
$query_url = "{$query}";
// Get the bibtex entries
$nodes = get_reference_nodes($query_url);
$ref_ids = extract_reference_ids($nodes);
$bib_ids = get_bibtex_imports($ref_ids);
// List of all bibtex imports, could export them to a file or whatever.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.