Skip to content

Instantly share code, notes, and snippets.

@mdeweerdt
Forked from avandeursen/pure2bibtex.py
Created June 19, 2021 08:53
Show Gist options
  • Save mdeweerdt/476a438a879fe9448d83cf8fb976ddb3 to your computer and use it in GitHub Desktop.
Save mdeweerdt/476a438a879fe9448d83cf8fb976ddb3 to your computer and use it in GitHub Desktop.
Turn a publication list in an Elsevier Pure web page into a list of bibtex entries with urls back to Pure.
import sys
import requests
from bs4 import BeautifulSoup
# Set to the PURE page you'd like to get the bibtex entries from. Here's mine:
pure = "https://pure.tudelft.nl/portal/en/persons/a-van-deursen(949eb2cc-4db0-4f33-bd56-13425fa5c24a)/publications.html?page=0"
class BibTexEntry:
def __init__(self, entry):
self.entry = entry
def withField(self, field):
start = self.entry.rstrip("}")
self.entry = start + " " + field + ",\n}"
return self
def withURL(self, url):
return self.withField("url = \"" + url + "\"")
def withGroup(self, group):
return self.withField("group = \"" + group + "\"")
def withCode(self):
return self.withField("code = {I-ISS-07}")
def inProceedings(self):
self.entry = self.entry.replace('@inbook{', '@inproceedings{')
return self
def process(self, url):
return (
self
.withURL(url)
.withGroup("SE")
.withCode()
.inProceedings()
.entry
)
def soupPage(url):
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
return soup
def titleLinks(researchOutput) :
anchors = researchOutput.select("div.rendering_researchoutput h3.title a.link")
links = list(map(lambda a: a['href'], anchors))
return links
def exportPage(titleLink) :
export = titleLink.replace('.html', '/export.html')
return soupPage(export)
def bibtex(page) :
bibdiv = page.select("div.rendering_bibtex")
return bibdiv[0].text
def getBibs(paperListURL) :
links = titleLinks(soupPage(paperListURL))
for paperURL in links :
bib = bibtex(exportPage(paperURL))
print(BibTexEntry(bib).process(paperURL))
getBibs(pure)
# Copyright 2017 Arie van Deursen
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment