Created
September 6, 2017 07:26
-
-
Save csjx/115df2d45cf2cd680fccc758697bb4d1 to your computer and use it in GitHub Desktop.
An R script that queries the DataONE Node Registry and extracts a list of dates when each MN became operational
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Query the DataONE Node Registry to find dates that Member Nodes began operation | |
# The DataONE Node registry service returns results as XML | |
library(httr) | |
library(xml2) | |
# The base URL for the DataONE Coordinating Node | |
cn_base_url <- "https://cn.dataone.org/cn" | |
# The Solr query service endpoint | |
cn_node_service <- "/v2/node" | |
nodes_url <- paste(cn_base_url, cn_node_service, sep = "") | |
# Execute the query | |
request <- GET(nodes_url) | |
node_list <- read_xml(content(request, as = "text", encoding = "UTF-8")) | |
# Filter the XML node list: | |
# - for Member Nodes only (type = mn) | |
# - with identifiers that don't start with 'urn:node:mn' (D1 replica nodes) | |
# - with a CN_operational_date property | |
nodes <- xml_find_all(node_list, | |
"//node[@type='mn' and not(starts-with(identifier, 'urn:node:mn'))]/property[@key='CN_date_operational']") | |
# Construct a node date list of just the text values from the filtered XML | |
dates <- xml_text(nodes) | |
# FYI: the node_list XML above looks like this (some elements removed for brevity): | |
# <?xml version="1.0" encoding="UTF-8"?> | |
# <ns3:nodeList xmlns:ns2="http://ns.dataone.org/service/types/v1" | |
# xmlns:ns3="http://ns.dataone.org/service/types/v2.0"> | |
# <node replicate="false" synchronize="true" type="mn" state="up"> | |
# <identifier>urn:node:RW</identifier> | |
# <name>Research Workspace</name> | |
# <description> | |
# The Research Workspace is a web-based, scientific data management | |
# platform that allows researchers to store and share their data. | |
# </description> | |
# <baseURL>https://dataone.researchworkspace.com/mn</baseURL> | |
# <property key="CN_date_operational">2017-07-25T00:00:0.000Z</property> | |
# </node> | |
# ... | |
# </ns3:nodeList> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment