Last active
January 12, 2022 17:38
-
-
Save schwehr/22ce6080eb9e730ef04fccfa25072e3a to your computer and use it in GitHub Desktop.
Prototype DOI wrapper class
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2020 Google LLC. | |
# SPDX-License-Identifier: Apache-2.0 | |
"""Handles Document Object Identifiers (DOI). | |
THIS IS ONLY A PROTOTYPE. | |
TODO(schwehr): Document the module. | |
https://en.wikipedia.org/wiki/Digital_object_identifier | |
https://www.doi.org/factsheets/DOIProxy.html | |
https://www.handle.net/ | |
https://wiki.osgeo.org/wiki/Persistent_identifiers(pid) | |
https://docs.github.com/en/repositories/archiving-a-github-repository/referencing-and-citing-content | |
10.1000/182 | |
10.1016/S0009-2614(97)04014-1 | |
10.1016/j.margeo.2007.01.012 | |
10.1029/2006GC001378 | |
10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2 | |
10.17487/RFC7669 | |
uri: | |
info:doi/10.1000/182 | |
info:doi/10.3334/ORNLDAAC/1328 | |
https://doi.org/10.3334/ORNLDAAC/1328 | |
https://doi.org/10.17487%2Frfc7669 | |
http://scitation.org/doi/10.1063/1.881498. <--- should this be handled? | |
http://dx.doi.org/10.5343/bms.2015.1034 | |
https://doi.org/10.1371%2Fjournal.pbio.0000057. <--- Does this code handle encoded chars? | |
DOI Registration Agency (RA) | |
""" | |
import enum | |
import re | |
from typing import Any, Dict, Optional | |
from urllib import parse | |
from absl import app | |
import requests | |
# TODO(schwehr): What is the correct regex for doi? | |
# https://github.com/radiantearth/stac-spec/issues/910 | |
DOI_REGEX = r'10[.][0-9]{4}([.][0-9]+)*/.+' | |
DOI_URL_BASE = 'https://doi.org/' | |
def is_doi_valid(doi: str) -> bool: | |
return re.match(DOI_REGEX, doi) | |
class Style(enum.Enum): | |
"""A subset of the doi citation formats. | |
The styling is done with citeproc-js. | |
The complete list is here: https://github.com/citation-style-language/styles | |
Many of these are renames from: | |
https://github.com/citation-style-language/styles/blob/master/renamed-styles.json | |
See also: | |
https://citation.crosscite.org/docs.html#sec-4-1 | |
https://pypi.org/project/citeproc-py/ | |
""" | |
APA = 'apa' | |
BIBTEX = 'bibtex' | |
CHICAGO = 'chicago-fullnote-bibliography' | |
HARVARD = 'harvard-swinburne-university-of-technology' # harvard3 | |
MLA = 'modern-language-association' | |
VANCOUVER = 'vancouver' | |
class Doi: | |
"""Document Object Identifier.""" | |
def __init__(self, doi: str): | |
"""Creates a Doi instance. | |
For URLs, __init__ allows http or https for the scheme. The netloc can be | |
either doi.org or dx.doi.org. | |
These examples are equivalent: | |
Doi('10.3334/ORNLDAAC/1328') | |
Doi('https://doi.org/10.3334/ORNLDAAC/1328') | |
Doi('http://doi.org/10.3334/ORNLDAAC/1328') | |
Doi('https://dx.doi.org/10.3334/ORNLDAAC/1328') | |
Doi('http://dx.doi.org/10.3334/ORNLDAAC/1328') | |
The capitalization of the doi string is preserved, but comparisons are | |
case-insensitive. e.g. these two Doi instances are equal, but will give | |
URLs with different capitalization. | |
Doi('10.3334/ORNLDAAC/1328') == Doi('10.3334/ornldaac/1328') | |
Args: | |
doi: A valid DOI or DOI URL. | |
e.g. or | |
Raises: | |
Exception: If the url is not as expected. | |
""" | |
if is_doi_valid(doi): | |
self.doi = doi | |
return | |
uri_start = 'info:doi/' | |
if doi.startswith(uri_start): | |
doi_str = doi[len(uri_start):] | |
if not is_doi_valid(doi_str): | |
raise Exception('Not valid DOI or DOI URL: "%s"' % doi) | |
self.doi = doi_str | |
return | |
# Assume the doi string in a DOI URL. | |
urlsplit = parse.urlsplit(doi) | |
if urlsplit.scheme not in ('http', 'https'): | |
raise Exception('nope scheme') | |
if urlsplit.netloc not in ['doi.org', 'dx.doi.org']: | |
raise Exception('nope') | |
# raise pystac.STACError('Invalid host for doi: %s' % urlsplit.netloc) | |
doi_str = parse.unquote(urlsplit.path[1:]) # Drop leading '/' | |
if not is_doi_valid(doi_str): | |
raise Exception('Not valid DOI or DOI URL: "%s"' % doi) | |
self.doi = doi_str | |
def url(self) -> str: | |
return DOI_URL_BASE + parse.quote(self.doi) | |
def __repr__(self) -> str: | |
return f'<Doi {self.doi}>' | |
def __eq__(self, other: 'Doi') -> bool: | |
"""Compares with the other using case in-sensitive check on self.doi.""" | |
if not isinstance(other, Doi): | |
return False | |
return self.doi.lower() == other.doi.lower() | |
def exists(self) -> bool: | |
result = requests.get(f'https://doi.org/doiRA/{self.doi}') | |
return 'RA' in result.json()[0] | |
def ra(self) -> str: | |
result = requests.get(f'https://doi.org/doiRA/{self.doi}') | |
return result.json()[0]['RA'] | |
def handles(self) -> Dict[str, Any]: | |
result = requests.get(f'https://doi.org/api/handles/{self.doi}') | |
return result.json() | |
def citation(self, style: Optional[Style] = Style.APA) -> str: | |
"""Returns the citation string for a doi.""" | |
headers = {'Accept': f'text/x-bibliography; style={style.value}'} | |
return requests.get(self.url(), headers=headers).text | |
def print_info(doi_str: str, have_internet: Optional[bool] = False) -> None: | |
"""Prints to stdout information about a DOI.""" | |
doi = Doi(doi_str) | |
print(f'Info for "{doi_str}":', doi) | |
print(' url:', doi.url()) | |
if not have_internet: | |
return | |
exists = doi.exists() | |
print(' exists:', 'yes' if doi.exists() else 'no') | |
if not exists: | |
return | |
print(' ra:', doi.ra()) | |
print(' handles:', doi.handles()) | |
print('citation:', doi.citation()[:140]) | |
print('citation:', doi.citation(Style.MLA)[:140]) | |
print('citation:', doi.citation(Style.BIBTEX)[:140]) | |
def main(argv): | |
if len(argv) < 2: | |
raise app.UsageError('Too few command-line arguments.') | |
print() | |
for arg in argv[1:]: | |
print_info(arg, have_internet=True) | |
print() | |
if __name__ == '__main__': | |
app.run(main) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
doi '10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2' 'https://doi.org/10.1130/0091-7613%282003%29031%3C0203%3ACOSSDD%3E2.0.CO%3B2' | |
Info for "10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2": <Doi 10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2> | |
url: https://doi.org/10.1130/0091-7613%282003%29031%3C0203%3ACOSSDD%3E2.0.CO%3B2 | |
exists: yes | |
ra: Crossref | |
handles: {'responseCode': 1, 'handle': '10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2', 'values': [{'index': 1, 'type': 'URL', 'data': {'format': 'string', 'value': 'https://pubs.geoscienceworld.org/geology/article/31/3/203-206/197720'}, 'ttl': 86400, 'timestamp': '2017-07-07T10:39:51Z'}, {'index': 700050, 'type': '700050', 'data': {'format': 'string', 'value': '200701311441580000'}, 'ttl': 86400, 'timestamp': '2018-03-08T20:12:00Z'}, {'index': 100, 'type': 'HS_ADMIN', 'data': {'format': 'admin', 'value': {'handle': '0.na/10.1130', 'index': 200, 'permissions': '111111110010'}}, 'ttl': 86400, 'timestamp': '2017-07-07T10:39:51Z'}]} | |
citation: Schwehr, K., & Tauxe, L. (2003). Characterization of soft-sediment deformation: Detection of cryptoslumps using magnetic methods. Geology, 3 | |
citation: Schwehr, Kurt, and Lisa Tauxe. âCharacterization of Soft-Sediment Deformation: Detection of Cryptoslumps Using Magnetic Methods.â Geolog | |
citation: @article{Schwehr_2003, title={Characterization of soft-sediment deformation: Detection of cryptoslumps using magnetic methods}, volume={31} | |
Info for "https://doi.org/10.1130/0091-7613%282003%29031%3C0203%3ACOSSDD%3E2.0.CO%3B2": <Doi 10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2> | |
url: https://doi.org/10.1130/0091-7613%282003%29031%3C0203%3ACOSSDD%3E2.0.CO%3B2 | |
exists: yes | |
ra: Crossref | |
handles: {'responseCode': 1, 'handle': '10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2', 'values': [{'index': 1, 'type': 'URL', 'data': {'format': 'string', 'value': 'https://pubs.geoscienceworld.org/geology/article/31/3/203-206/197720'}, 'ttl': 86400, 'timestamp': '2017-07-07T10:39:51Z'}, {'index': 700050, 'type': '700050', 'data': {'format': 'string', 'value': '200701311441580000'}, 'ttl': 86400, 'timestamp': '2018-03-08T20:12:00Z'}, {'index': 100, 'type': 'HS_ADMIN', 'data': {'format': 'admin', 'value': {'handle': '0.na/10.1130', 'index': 200, 'permissions': '111111110010'}}, 'ttl': 86400, 'timestamp': '2017-07-07T10:39:51Z'}]} | |
citation: Schwehr, K., & Tauxe, L. (2003). Characterization of soft-sediment deformation: Detection of cryptoslumps using magnetic methods. Geology, 3 | |
citation: Schwehr, Kurt, and Lisa Tauxe. âCharacterization of Soft-Sediment Deformation: Detection of Cryptoslumps Using Magnetic Methods.â Geolog | |
citation: @article{Schwehr_2003, title={Characterization of soft-sediment deformation: Detection of cryptoslumps using magnetic methods}, volume={31} | |
doi info:doi/10.3334/ORNLDAAC/1328 https://doi.org/10.3334/ORNLDAAC/1328 http://dx.doi.org/10.3334/ornldaac/1328 10.3334/ornldaac/1328/DOES_NOT_EXIST | |
Info for "info:doi/10.3334/ORNLDAAC/1328": <Doi 10.3334/ORNLDAAC/1328> | |
url: https://doi.org/10.3334/ORNLDAAC/1328 | |
exists: yes | |
ra: DataCite | |
handles: {'responseCode': 1, 'handle': '10.3334/ORNLDAAC/1328', 'values': [{'index': 100, 'type': 'HS_ADMIN', 'data': {'format': 'admin', 'value': {'handle': '10.admin/codata', 'index': 300, 'permissions': '111111111111'}}, 'ttl': 86400, 'timestamp': '2019-10-09T10:30:52Z'}, {'index': 1, 'type': 'URL', 'data': {'format': 'string', 'value': 'https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=1328'}, 'ttl': 86400, 'timestamp': '2017-04-19T17:18:12Z'}]} | |
citation: Thornton, P. E., Thornton, M. M., Mayer, B. W., Wei, Y., Devarakonda, R., Vose, R. S., & Cook, R. B. (2016). <i>Daymet: Daily Surface We | |
citation: Thornton, P. E., et al. <i>Daymet: Daily Surface Weather Data on a 1-Km Grid for North America, Version 3</i>. 3.4, ORNL Distributed Active | |
citation: @article{thornton_thornton_mayer_wei_devarakonda_vose_cook_2016, title={Daymet: Daily Surface Weather Data on a 1-km Grid for North America | |
Info for "https://doi.org/10.3334/ORNLDAAC/1328": <Doi 10.3334/ORNLDAAC/1328> | |
url: https://doi.org/10.3334/ORNLDAAC/1328 | |
exists: yes | |
ra: DataCite | |
handles: {'responseCode': 1, 'handle': '10.3334/ORNLDAAC/1328', 'values': [{'index': 100, 'type': 'HS_ADMIN', 'data': {'format': 'admin', 'value': {'handle': '10.admin/codata', 'index': 300, 'permissions': '111111111111'}}, 'ttl': 86400, 'timestamp': '2019-10-09T10:30:52Z'}, {'index': 1, 'type': 'URL', 'data': {'format': 'string', 'value': 'https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=1328'}, 'ttl': 86400, 'timestamp': '2017-04-19T17:18:12Z'}]} | |
citation: Thornton, P. E., Thornton, M. M., Mayer, B. W., Wei, Y., Devarakonda, R., Vose, R. S., & Cook, R. B. (2016). <i>Daymet: Daily Surface We | |
citation: Thornton, P. E., et al. <i>Daymet: Daily Surface Weather Data on a 1-Km Grid for North America, Version 3</i>. 3.4, ORNL Distributed Active | |
citation: @article{thornton_thornton_mayer_wei_devarakonda_vose_cook_2016, title={Daymet: Daily Surface Weather Data on a 1-km Grid for North America |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment