Last active
September 16, 2016 22:55
-
-
Save benmarwick/50aa4c39b7d955d0ec40 to your computer and use it in GitHub Desktop.
Experimenting with the opencontext.org API using R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# To get to animal bones in Turkey: | |
q <- "http://146.148.79.138/sets/Turkey.json?geodeep=10&prop=oc-gen-cat-animal-bone&prop=oc-zoo-anatomical-meas---oc-zoo-von-den-driesch-bone-meas---oc-zoo-bd" | |
# httr method | |
library(devtools) | |
install_github('hadley/httr') | |
library(httr) | |
library(jsonlite) | |
out <- GET(url = q) | |
# did it work? | |
http_status(out) | |
data <- content(out) | |
# what do we have? | |
data | |
length(data) | |
names(data) | |
str(data) | |
str(data$type) | |
str(data$features) | |
names(data$features) | |
data$features[[3]] | |
# or jsonlite method (don't need httr) | |
library(jsonlite) | |
data <- fromJSON(q) | |
str(data) | |
data[sapply(data, is.data.frame)] | |
write.csv(data, "opencontext.csv") | |
# http://opencontext.org/about/services | |
########################################################### | |
# retrieves items located in the region bound by coords | |
N_min <- 19.382 | |
E_min <- 34.800 | |
N_max <- 29.667 | |
E_max <- 44.000 | |
query <- paste0("http://opencontext.org/sets/.json?bBox=",E_min, ",", N_min, ",", E_max, ",", N_max) | |
out <- GET(url = query) | |
# did it work? | |
http_status(out) | |
data <- content(out) | |
# what do we have? | |
length(data) | |
names(data) | |
data$results[[2]] | |
data$facets$context | |
# find the projects within the region | |
projects <- do.call(rbind.data.frame, data$facets$project) | |
projects$name | |
# find the categories of evidence in the region | |
# and counts of each | |
categories <- do.call(rbind.data.frame, data$facets$category) | |
# find all the records of one category of evidence | |
animalbone <- data$results[sapply(data$results, function(i) i$category == "Animal Bone")] | |
do.call(rbind.data.frame, (sapply(animalbone, function(i) i$var_vals)) | |
do.call(rbind.data.frame, animalbone) | |
########################################################## | |
# get items by date range | |
# from Open Context with dates ranging from 1500 BCE to 500 CE. | |
start <- -1500 # early | |
end <- 1000 # late | |
qry <- paste0("http://opencontext.org/sets/.json?t-start=", start, "&t-end=", "end") | |
out <- GET(url = qry) | |
http_status(out) | |
data <- content(out) | |
# get project names | |
projs <- (sapply(1:length(data$results), function(i) data$results[[i]]$project)) | |
# get tentative dating | |
tent_dates <- lapply(1:length(data$results), function(i) data$results[[i]]$var_vals$`Tentative Date`) | |
# change NULL to NA and keep them when unlist-ing | |
# http://r.789695.n4.nabble.com/need-help-with-unlist-losing-NULL-values-td4633837.html | |
tent_dates[unlist(lapply(tent_dates , is.null))] <- NA | |
tent_dates <- unlist(tent_dates) | |
data.frame(projs = projs, tent_dates = tent_dates) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Looks good. The new version of the API has a bounding box query too. But it is not documented because because I made a goof translating between GeoJSON ordering of coordinates and Solr's ordering of coordinates, see comments in the commit:
ekansa/open-context-py@df93460
I'm still reindexing all of Open Context to fix this goof. For the next few days (as March 27, 2015), the bounding box queries will only work for the coordinates between 0,0 and 90,90. The new bounding box querying adopts the GeoJSON coordinate pattern (doing the same thing as Pleiades). The order is:
See example.
http://146.148.79.138/sets/?disc-bbox=10,10,45,45