Skip to content

Instantly share code, notes, and snippets.

@schwehr
Last active January 12, 2022 17:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save schwehr/22ce6080eb9e730ef04fccfa25072e3a to your computer and use it in GitHub Desktop.
Save schwehr/22ce6080eb9e730ef04fccfa25072e3a to your computer and use it in GitHub Desktop.
Prototype DOI wrapper class
# Copyright 2020 Google LLC.
# SPDX-License-Identifier: Apache-2.0
"""Handles Document Object Identifiers (DOI).
THIS IS ONLY A PROTOTYPE.
TODO(schwehr): Document the module.
https://en.wikipedia.org/wiki/Digital_object_identifier
https://www.doi.org/factsheets/DOIProxy.html
https://www.handle.net/
https://wiki.osgeo.org/wiki/Persistent_identifiers(pid)
https://docs.github.com/en/repositories/archiving-a-github-repository/referencing-and-citing-content
10.1000/182
10.1016/S0009-2614(97)04014-1
10.1016/j.margeo.2007.01.012
10.1029/2006GC001378
10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2
10.17487/RFC7669
uri:
info:doi/10.1000/182
info:doi/10.3334/ORNLDAAC/1328
https://doi.org/10.3334/ORNLDAAC/1328
https://doi.org/10.17487%2Frfc7669
http://scitation.org/doi/10.1063/1.881498. <--- should this be handled?
http://dx.doi.org/10.5343/bms.2015.1034
https://doi.org/10.1371%2Fjournal.pbio.0000057. <--- Does this code handle encoded chars?
DOI Registration Agency (RA)
"""
import enum
import re
from typing import Any, Dict, Optional
from urllib import parse
from absl import app
import requests
# TODO(schwehr): What is the correct regex for doi?
# https://github.com/radiantearth/stac-spec/issues/910
DOI_REGEX = r'10[.][0-9]{4}([.][0-9]+)*/.+'
DOI_URL_BASE = 'https://doi.org/'
def is_doi_valid(doi: str) -> bool:
return re.match(DOI_REGEX, doi)
class Style(enum.Enum):
"""A subset of the doi citation formats.
The styling is done with citeproc-js.
The complete list is here: https://github.com/citation-style-language/styles
Many of these are renames from:
https://github.com/citation-style-language/styles/blob/master/renamed-styles.json
See also:
https://citation.crosscite.org/docs.html#sec-4-1
https://pypi.org/project/citeproc-py/
"""
APA = 'apa'
BIBTEX = 'bibtex'
CHICAGO = 'chicago-fullnote-bibliography'
HARVARD = 'harvard-swinburne-university-of-technology' # harvard3
MLA = 'modern-language-association'
VANCOUVER = 'vancouver'
class Doi:
"""Document Object Identifier."""
def __init__(self, doi: str):
"""Creates a Doi instance.
For URLs, __init__ allows http or https for the scheme. The netloc can be
either doi.org or dx.doi.org.
These examples are equivalent:
Doi('10.3334/ORNLDAAC/1328')
Doi('https://doi.org/10.3334/ORNLDAAC/1328')
Doi('http://doi.org/10.3334/ORNLDAAC/1328')
Doi('https://dx.doi.org/10.3334/ORNLDAAC/1328')
Doi('http://dx.doi.org/10.3334/ORNLDAAC/1328')
The capitalization of the doi string is preserved, but comparisons are
case-insensitive. e.g. these two Doi instances are equal, but will give
URLs with different capitalization.
Doi('10.3334/ORNLDAAC/1328') == Doi('10.3334/ornldaac/1328')
Args:
doi: A valid DOI or DOI URL.
e.g. or
Raises:
Exception: If the url is not as expected.
"""
if is_doi_valid(doi):
self.doi = doi
return
uri_start = 'info:doi/'
if doi.startswith(uri_start):
doi_str = doi[len(uri_start):]
if not is_doi_valid(doi_str):
raise Exception('Not valid DOI or DOI URL: "%s"' % doi)
self.doi = doi_str
return
# Assume the doi string in a DOI URL.
urlsplit = parse.urlsplit(doi)
if urlsplit.scheme not in ('http', 'https'):
raise Exception('nope scheme')
if urlsplit.netloc not in ['doi.org', 'dx.doi.org']:
raise Exception('nope')
# raise pystac.STACError('Invalid host for doi: %s' % urlsplit.netloc)
doi_str = parse.unquote(urlsplit.path[1:]) # Drop leading '/'
if not is_doi_valid(doi_str):
raise Exception('Not valid DOI or DOI URL: "%s"' % doi)
self.doi = doi_str
def url(self) -> str:
return DOI_URL_BASE + parse.quote(self.doi)
def __repr__(self) -> str:
return f'<Doi {self.doi}>'
def __eq__(self, other: 'Doi') -> bool:
"""Compares with the other using case in-sensitive check on self.doi."""
if not isinstance(other, Doi):
return False
return self.doi.lower() == other.doi.lower()
def exists(self) -> bool:
result = requests.get(f'https://doi.org/doiRA/{self.doi}')
return 'RA' in result.json()[0]
def ra(self) -> str:
result = requests.get(f'https://doi.org/doiRA/{self.doi}')
return result.json()[0]['RA']
def handles(self) -> Dict[str, Any]:
result = requests.get(f'https://doi.org/api/handles/{self.doi}')
return result.json()
def citation(self, style: Optional[Style] = Style.APA) -> str:
"""Returns the citation string for a doi."""
headers = {'Accept': f'text/x-bibliography; style={style.value}'}
return requests.get(self.url(), headers=headers).text
def print_info(doi_str: str, have_internet: Optional[bool] = False) -> None:
"""Prints to stdout information about a DOI."""
doi = Doi(doi_str)
print(f'Info for "{doi_str}":', doi)
print(' url:', doi.url())
if not have_internet:
return
exists = doi.exists()
print(' exists:', 'yes' if doi.exists() else 'no')
if not exists:
return
print(' ra:', doi.ra())
print(' handles:', doi.handles())
print('citation:', doi.citation()[:140])
print('citation:', doi.citation(Style.MLA)[:140])
print('citation:', doi.citation(Style.BIBTEX)[:140])
def main(argv):
if len(argv) < 2:
raise app.UsageError('Too few command-line arguments.')
print()
for arg in argv[1:]:
print_info(arg, have_internet=True)
print()
if __name__ == '__main__':
app.run(main)
doi '10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2' 'https://doi.org/10.1130/0091-7613%282003%29031%3C0203%3ACOSSDD%3E2.0.CO%3B2'
Info for "10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2": <Doi 10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2>
url: https://doi.org/10.1130/0091-7613%282003%29031%3C0203%3ACOSSDD%3E2.0.CO%3B2
exists: yes
ra: Crossref
handles: {'responseCode': 1, 'handle': '10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2', 'values': [{'index': 1, 'type': 'URL', 'data': {'format': 'string', 'value': 'https://pubs.geoscienceworld.org/geology/article/31/3/203-206/197720'}, 'ttl': 86400, 'timestamp': '2017-07-07T10:39:51Z'}, {'index': 700050, 'type': '700050', 'data': {'format': 'string', 'value': '200701311441580000'}, 'ttl': 86400, 'timestamp': '2018-03-08T20:12:00Z'}, {'index': 100, 'type': 'HS_ADMIN', 'data': {'format': 'admin', 'value': {'handle': '0.na/10.1130', 'index': 200, 'permissions': '111111110010'}}, 'ttl': 86400, 'timestamp': '2017-07-07T10:39:51Z'}]}
citation: Schwehr, K., & Tauxe, L. (2003). Characterization of soft-sediment deformation: Detection of cryptoslumps using magnetic methods. Geology, 3
citation: Schwehr, Kurt, and Lisa Tauxe. “Characterization of Soft-Sediment Deformation: Detection of Cryptoslumps Using Magnetic Methods.” Geolog
citation: @article{Schwehr_2003, title={Characterization of soft-sediment deformation: Detection of cryptoslumps using magnetic methods}, volume={31}
Info for "https://doi.org/10.1130/0091-7613%282003%29031%3C0203%3ACOSSDD%3E2.0.CO%3B2": <Doi 10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2>
url: https://doi.org/10.1130/0091-7613%282003%29031%3C0203%3ACOSSDD%3E2.0.CO%3B2
exists: yes
ra: Crossref
handles: {'responseCode': 1, 'handle': '10.1130/0091-7613(2003)031<0203:COSSDD>2.0.CO;2', 'values': [{'index': 1, 'type': 'URL', 'data': {'format': 'string', 'value': 'https://pubs.geoscienceworld.org/geology/article/31/3/203-206/197720'}, 'ttl': 86400, 'timestamp': '2017-07-07T10:39:51Z'}, {'index': 700050, 'type': '700050', 'data': {'format': 'string', 'value': '200701311441580000'}, 'ttl': 86400, 'timestamp': '2018-03-08T20:12:00Z'}, {'index': 100, 'type': 'HS_ADMIN', 'data': {'format': 'admin', 'value': {'handle': '0.na/10.1130', 'index': 200, 'permissions': '111111110010'}}, 'ttl': 86400, 'timestamp': '2017-07-07T10:39:51Z'}]}
citation: Schwehr, K., & Tauxe, L. (2003). Characterization of soft-sediment deformation: Detection of cryptoslumps using magnetic methods. Geology, 3
citation: Schwehr, Kurt, and Lisa Tauxe. “Characterization of Soft-Sediment Deformation: Detection of Cryptoslumps Using Magnetic Methods.” Geolog
citation: @article{Schwehr_2003, title={Characterization of soft-sediment deformation: Detection of cryptoslumps using magnetic methods}, volume={31}
doi info:doi/10.3334/ORNLDAAC/1328 https://doi.org/10.3334/ORNLDAAC/1328 http://dx.doi.org/10.3334/ornldaac/1328 10.3334/ornldaac/1328/DOES_NOT_EXIST
Info for "info:doi/10.3334/ORNLDAAC/1328": <Doi 10.3334/ORNLDAAC/1328>
url: https://doi.org/10.3334/ORNLDAAC/1328
exists: yes
ra: DataCite
handles: {'responseCode': 1, 'handle': '10.3334/ORNLDAAC/1328', 'values': [{'index': 100, 'type': 'HS_ADMIN', 'data': {'format': 'admin', 'value': {'handle': '10.admin/codata', 'index': 300, 'permissions': '111111111111'}}, 'ttl': 86400, 'timestamp': '2019-10-09T10:30:52Z'}, {'index': 1, 'type': 'URL', 'data': {'format': 'string', 'value': 'https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=1328'}, 'ttl': 86400, 'timestamp': '2017-04-19T17:18:12Z'}]}
citation: Thornton, P. E., Thornton, M. M., Mayer, B. W., Wei, Y., Devarakonda, R., Vose, R. S., &amp; Cook, R. B. (2016). <i>Daymet: Daily Surface We
citation: Thornton, P. E., et al. <i>Daymet: Daily Surface Weather Data on a 1-Km Grid for North America, Version 3</i>. 3.4, ORNL Distributed Active
citation: @article{thornton_thornton_mayer_wei_devarakonda_vose_cook_2016, title={Daymet: Daily Surface Weather Data on a 1-km Grid for North America
Info for "https://doi.org/10.3334/ORNLDAAC/1328": <Doi 10.3334/ORNLDAAC/1328>
url: https://doi.org/10.3334/ORNLDAAC/1328
exists: yes
ra: DataCite
handles: {'responseCode': 1, 'handle': '10.3334/ORNLDAAC/1328', 'values': [{'index': 100, 'type': 'HS_ADMIN', 'data': {'format': 'admin', 'value': {'handle': '10.admin/codata', 'index': 300, 'permissions': '111111111111'}}, 'ttl': 86400, 'timestamp': '2019-10-09T10:30:52Z'}, {'index': 1, 'type': 'URL', 'data': {'format': 'string', 'value': 'https://daac.ornl.gov/cgi-bin/dsviewer.pl?ds_id=1328'}, 'ttl': 86400, 'timestamp': '2017-04-19T17:18:12Z'}]}
citation: Thornton, P. E., Thornton, M. M., Mayer, B. W., Wei, Y., Devarakonda, R., Vose, R. S., &amp; Cook, R. B. (2016). <i>Daymet: Daily Surface We
citation: Thornton, P. E., et al. <i>Daymet: Daily Surface Weather Data on a 1-Km Grid for North America, Version 3</i>. 3.4, ORNL Distributed Active
citation: @article{thornton_thornton_mayer_wei_devarakonda_vose_cook_2016, title={Daymet: Daily Surface Weather Data on a 1-km Grid for North America
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment