Last active
January 2, 2016 22:19
-
-
Save tts/8369640 to your computer and use it in GitHub Desktop.
Prunes data for querying GBIF. Original data from Rainforest Foundation Norway
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################## | |
# | |
# 11.1.2014 Tuija Sonkkila | |
# | |
# Function to transform a dataframe to GeoJSON. | |
# | |
# Symbol from the Maki project http://mapbox.com/maki/ or | |
# a single alphanumeric character | |
# See https://help.github.com/articles/mapping-geojson-files-on-github | |
# | |
# Note that here, you'll get the same symbol and color on every point | |
# | |
########################################################################## | |
library(RJSONIO) | |
togeojson <- function(input = NULL, symbol = "park2", color = "#ff4444"){ | |
f <- deparse(substitute(input)) | |
l <- list(type = "FeatureCollection", | |
features = lapply(seq(nrow(input)), | |
function(x) list(type = "Feature", | |
geometry = list(type = "Point", | |
coordinates = array(input[x, c("longitude", "latitude")])), | |
properties = list(markersymbol = symbol, | |
markercolor = color, | |
popupContent = input[x, "name"])))) | |
tofile <- paste0(f, ".geojson") | |
cat(toJSON(l), file=tofile) | |
# I couldn't get my R environment to accept '-' as part of the list name. | |
# Here, I'll use the Unix sed tool to get it. I'm sure there is an R way | |
# to accomplish the same thing (first unlist l, gsub, and then to list again?) | |
try(system(paste("sed -i 's/marker/marker-/g' ", tofile, sep=""), intern = TRUE, ignore.stderr = TRUE)) | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################################### | |
# | |
# 11.1.2014 Tuija Sonkkila | |
# | |
# Data from Rainforest Foundation Norway | |
# http://www.regnskog.no/no/bevisst-forbruker/tropisk-t%C3%B8mmer/oversikt-tropiske-treslag | |
# | |
# Scraped with Scraper, and saved as a spreadsheet on Google Drive | |
# | |
# Prunes data for querying the GBIF API with the rgbif wrapper by rOpenSci | |
# | |
# Saves GBIF return data by status as 1) .Rda, and 2) GeoJSON | |
# | |
############################################################################################## | |
library(RCurl) | |
library(rgbif) | |
library(plyr) | |
source("togeojson.R") | |
# http://stackoverflow.com/questions/17411313/ssl-verification-causes-rcurl-and-httr-to-break-on-a-website-that-should-be-le | |
# May also work for you without the ssl.verifypeer argument. Didn't for me under Windows | |
treedata <- getURL("https://docs.google.com/spreadsheet/pub?key=0AvfW9KgU1XzhdDNFbzBqb1RWWkl4bVhnR1BlTlJkb2c&output=csv", | |
ssl.verifypeer = FALSE) | |
trdata <- read.csv(text = treedata) | |
d <- trdata | |
# Remove whitespace | |
d2 <- as.data.frame(apply(d, 2, function(x) gsub('\\s\\s+', '', x)), stringsAsFactors = FALSE) | |
# Not rows with '- se'in Text | |
d2 <- d2[!substr(d2$Text, 1, 4) == '- se',] | |
# No braces | |
d2$Latin <- gsub('\\(', '', d2$Latin) | |
d2$Latin <- gsub('\\)', '', d2$Latin) | |
# Some further cleaning | |
d2 <- d2[!substr(d2$Tree,1,5) == 'Yakal',] | |
d2[d2$Tree == "Ramin",]$Latin <- "Gonystylus spp." | |
d2[d2$Tree == 'Almendrillo',]$Latin <- "Dipteryx odorata" | |
d2[d2$Tree == 'Gabon',]$Latin <- "Aucoumea klaineana" | |
d2[d2$Tree == 'Parana',]$Latin <- "Araucaria angustifolia" | |
# Take the first Latin name only if several | |
d2$Latin1st <- sapply(d2$Latin, function(x) unlist(strsplit(x, "[,/]"))[1]) | |
# Separate the status info from Text | |
d2$Status <- sapply(d2$Text, function(x) gsub("^.*\\(([A-Z][A-Z].*?)\\).*$", "\\1", x)) | |
# Continue with a subset of cols | |
trees <- d2[ ,c("Tree", "Status", "Latin1st")] | |
names(trees) <- c("Name", "Status", "Latin") | |
# EX - Extinct (utdødd) | |
# RE - Regionally Extinct (regionalt utdødd) | |
# CR - Critically Endangered (kritisk truet) | |
# EN - Endangered (sterkt truet) | |
# VU - Vulnerable (sårbar) | |
# NT - Near Threatened (Nær truet) | |
# Note: http://www.andys-sundaypink.com/i/r-error-during-write-table/#comment-116 | |
trees$EX <- sapply(trees$Status, function(x) grepl("EX", x)) | |
trees$RE <- sapply(trees$Status, function(x) grepl("RE", x)) | |
trees$CR <- sapply(trees$Status, function(x) grepl("CR", x)) | |
trees$EN <- sapply(trees$Status, function(x) grepl("EN", x)) | |
trees$VU <- sapply(trees$Status, function(x) grepl("VU", x)) | |
trees$NT <- sapply(trees$Status, function(x) grepl("NT", x)) | |
# Other statuses | |
trees$Other <- sapply(trees$Status, function(x) grepl("[ERCVN][XERNUT]", x)) | |
# Drop the combined Status col | |
trees <- trees[, c("Name", "Latin", "EX", "RE", "CR", "EN", "VU", "NT", "Other")] | |
ex <- trees[trees$EX == TRUE,] | |
re <- trees[trees$RE == TRUE,] | |
cr <- trees[trees$CR == TRUE,] | |
en <- trees[trees$EN == TRUE,] | |
vu <- trees[trees$VU == TRUE,] | |
nt <- trees[trees$NT == TRUE,] | |
other <- trees[trees$Other == FALSE,] | |
if (exists("datdf")){ | |
remove(datdf) | |
} | |
qgbif <- function(x) { | |
splist <- x$Latin | |
keys <- sapply(splist, function(x) name_backbone(name=x, kingdom='plants')$speciesKey, USE.NAMES=FALSE) | |
keys <- keys[!sapply(keys, is.null)] | |
if (!length(keys)==0) { | |
dat <- occ_search(taxonKey=keys, limit=100, return='data', georeferenced=TRUE, minimal=FALSE) | |
wdata <- dat[!grepl("^no data found", dat)] | |
datdf <- ldply(wdata) | |
assign('datdf', datdf, envir=.GlobalEnv) | |
} | |
} | |
qgbif(ex) | |
if (exists("datdf")) { | |
Extinct <- datdf | |
save(Extinct, file="Extinct.Rda") | |
togeojson(Extinct) | |
remove(datdf) | |
} | |
qgbif(cr) | |
if (exists("datdf")) { | |
Critically_Endangered <- datdf | |
save(Critically_Endangered, file="Critically_Endangered.Rda") | |
togeojson(Critically_Endangered) | |
remove(datdf) | |
} | |
qgbif(en) | |
if (exists("datdf")) { | |
Endangered <- datdf | |
save(Endangered, file="Endangered.Rda") | |
togeojson(Endangered) | |
remove(datdf) | |
} | |
qgbif(vu) | |
if (exists("datdf")) { | |
Vulnerable <- datdf | |
save(Vulnerable, file="Vulnerable.Rda") | |
togeojson(Vulnerable) | |
remove(datdf) | |
} | |
qgbif(nt) | |
if (exists("datdf")) { | |
Near_Threatened <- datdf | |
save(Near_Threatened, file="Near_Threatened.Rda") | |
togeojson(Near_Threatened) | |
remove(datdf) | |
} | |
qgbif(other) | |
if (exists("datdf")) { | |
Other <- datdf | |
save(Other, file="Other.Rda") | |
togeojson(Other) | |
remove(datdf) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment