Skip to content

Instantly share code, notes, and snippets.

@tts
Last active January 2, 2016 22:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tts/8369640 to your computer and use it in GitHub Desktop.
Save tts/8369640 to your computer and use it in GitHub Desktop.
Prunes data for querying GBIF. Original data from Rainforest Foundation Norway
##########################################################################
#
# 11.1.2014 Tuija Sonkkila
#
# Function to transform a dataframe to GeoJSON.
#
# Symbol from the Maki project http://mapbox.com/maki/ or
# a single alphanumeric character
# See https://help.github.com/articles/mapping-geojson-files-on-github
#
# Note that here, you'll get the same symbol and color on every point
#
##########################################################################
library(RJSONIO)
togeojson <- function(input = NULL, symbol = "park2", color = "#ff4444"){
f <- deparse(substitute(input))
l <- list(type = "FeatureCollection",
features = lapply(seq(nrow(input)),
function(x) list(type = "Feature",
geometry = list(type = "Point",
coordinates = array(input[x, c("longitude", "latitude")])),
properties = list(markersymbol = symbol,
markercolor = color,
popupContent = input[x, "name"]))))
tofile <- paste0(f, ".geojson")
cat(toJSON(l), file=tofile)
# I couldn't get my R environment to accept '-' as part of the list name.
# Here, I'll use the Unix sed tool to get it. I'm sure there is an R way
# to accomplish the same thing (first unlist l, gsub, and then to list again?)
try(system(paste("sed -i 's/marker/marker-/g' ", tofile, sep=""), intern = TRUE, ignore.stderr = TRUE))
}
###############################################################################################
#
# 11.1.2014 Tuija Sonkkila
#
# Data from Rainforest Foundation Norway
# http://www.regnskog.no/no/bevisst-forbruker/tropisk-t%C3%B8mmer/oversikt-tropiske-treslag
#
# Scraped with Scraper, and saved as a spreadsheet on Google Drive
#
# Prunes data for querying the GBIF API with the rgbif wrapper by rOpenSci
#
# Saves GBIF return data by status as 1) .Rda, and 2) GeoJSON
#
##############################################################################################
library(RCurl)
library(rgbif)
library(plyr)
source("togeojson.R")
# http://stackoverflow.com/questions/17411313/ssl-verification-causes-rcurl-and-httr-to-break-on-a-website-that-should-be-le
# May also work for you without the ssl.verifypeer argument. Didn't for me under Windows
treedata <- getURL("https://docs.google.com/spreadsheet/pub?key=0AvfW9KgU1XzhdDNFbzBqb1RWWkl4bVhnR1BlTlJkb2c&output=csv",
ssl.verifypeer = FALSE)
trdata <- read.csv(text = treedata)
d <- trdata
# Remove whitespace
d2 <- as.data.frame(apply(d, 2, function(x) gsub('\\s\\s+', '', x)), stringsAsFactors = FALSE)
# Not rows with '- se'in Text
d2 <- d2[!substr(d2$Text, 1, 4) == '- se',]
# No braces
d2$Latin <- gsub('\\(', '', d2$Latin)
d2$Latin <- gsub('\\)', '', d2$Latin)
# Some further cleaning
d2 <- d2[!substr(d2$Tree,1,5) == 'Yakal',]
d2[d2$Tree == "Ramin",]$Latin <- "Gonystylus spp."
d2[d2$Tree == 'Almendrillo',]$Latin <- "Dipteryx odorata"
d2[d2$Tree == 'Gabon',]$Latin <- "Aucoumea klaineana"
d2[d2$Tree == 'Parana',]$Latin <- "Araucaria angustifolia"
# Take the first Latin name only if several
d2$Latin1st <- sapply(d2$Latin, function(x) unlist(strsplit(x, "[,/]"))[1])
# Separate the status info from Text
d2$Status <- sapply(d2$Text, function(x) gsub("^.*\\(([A-Z][A-Z].*?)\\).*$", "\\1", x))
# Continue with a subset of cols
trees <- d2[ ,c("Tree", "Status", "Latin1st")]
names(trees) <- c("Name", "Status", "Latin")
# EX - Extinct (utdødd)
# RE - Regionally Extinct (regionalt utdødd)
# CR - Critically Endangered (kritisk truet)
# EN - Endangered (sterkt truet)
# VU - Vulnerable (sårbar)
# NT - Near Threatened (Nær truet)
# Note: http://www.andys-sundaypink.com/i/r-error-during-write-table/#comment-116
trees$EX <- sapply(trees$Status, function(x) grepl("EX", x))
trees$RE <- sapply(trees$Status, function(x) grepl("RE", x))
trees$CR <- sapply(trees$Status, function(x) grepl("CR", x))
trees$EN <- sapply(trees$Status, function(x) grepl("EN", x))
trees$VU <- sapply(trees$Status, function(x) grepl("VU", x))
trees$NT <- sapply(trees$Status, function(x) grepl("NT", x))
# Other statuses
trees$Other <- sapply(trees$Status, function(x) grepl("[ERCVN][XERNUT]", x))
# Drop the combined Status col
trees <- trees[, c("Name", "Latin", "EX", "RE", "CR", "EN", "VU", "NT", "Other")]
ex <- trees[trees$EX == TRUE,]
re <- trees[trees$RE == TRUE,]
cr <- trees[trees$CR == TRUE,]
en <- trees[trees$EN == TRUE,]
vu <- trees[trees$VU == TRUE,]
nt <- trees[trees$NT == TRUE,]
other <- trees[trees$Other == FALSE,]
if (exists("datdf")){
remove(datdf)
}
qgbif <- function(x) {
splist <- x$Latin
keys <- sapply(splist, function(x) name_backbone(name=x, kingdom='plants')$speciesKey, USE.NAMES=FALSE)
keys <- keys[!sapply(keys, is.null)]
if (!length(keys)==0) {
dat <- occ_search(taxonKey=keys, limit=100, return='data', georeferenced=TRUE, minimal=FALSE)
wdata <- dat[!grepl("^no data found", dat)]
datdf <- ldply(wdata)
assign('datdf', datdf, envir=.GlobalEnv)
}
}
qgbif(ex)
if (exists("datdf")) {
Extinct <- datdf
save(Extinct, file="Extinct.Rda")
togeojson(Extinct)
remove(datdf)
}
qgbif(cr)
if (exists("datdf")) {
Critically_Endangered <- datdf
save(Critically_Endangered, file="Critically_Endangered.Rda")
togeojson(Critically_Endangered)
remove(datdf)
}
qgbif(en)
if (exists("datdf")) {
Endangered <- datdf
save(Endangered, file="Endangered.Rda")
togeojson(Endangered)
remove(datdf)
}
qgbif(vu)
if (exists("datdf")) {
Vulnerable <- datdf
save(Vulnerable, file="Vulnerable.Rda")
togeojson(Vulnerable)
remove(datdf)
}
qgbif(nt)
if (exists("datdf")) {
Near_Threatened <- datdf
save(Near_Threatened, file="Near_Threatened.Rda")
togeojson(Near_Threatened)
remove(datdf)
}
qgbif(other)
if (exists("datdf")) {
Other <- datdf
save(Other, file="Other.Rda")
togeojson(Other)
remove(datdf)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment