Last active
August 10, 2022 13:37
-
-
Save J535D165/7612d7138bc72c37ef03c9e2d73e6466 to your computer and use it in GitHub Desktop.
Get the publisher (or any variables) from a DOI via DataCite
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as ET | |
import requests | |
# curl -LH "Accept: application/vnd.datacite.datacite+xml" https://doi.org/10.34894/KEIQRD | |
def get_datapublisher_from_doi(doi): | |
"""Get the publisher from the DOI. | |
Arguments | |
--------- | |
doi: str | |
The DOI to find the publisher for. | |
Returns | |
------- | |
str: | |
The publisher. | |
""" | |
r = requests.get( | |
f"https://doi.org/{doi}", | |
headers= {"Accept": "application/vnd.datacite.datacite+xml"}, | |
allow_redirects=True | |
) | |
if r.status_code != 200: | |
raise ValueError("DOI not found") | |
tree = ET.fromstring(r.content) | |
node_pub = tree.find("{http://datacite.org/schema/kernel-4}publisher") | |
if node_pub is not None: | |
return node_pub.text | |
return None | |
def test_datapublisher_retrieval(): | |
assert get_datapublisher_from_doi("10.34894/KEIQRD") == "DataverseNL" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment