Last active
May 23, 2016 12:19
-
-
Save DomBennett/1094e73dfa7d75db112bc2d249e9706d to your computer and use it in GitHub Desktop.
dplyr example with PREDICTS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PREDICTS AND DPLYR | |
# D. Bennett & L. Graham | |
# BRIEF: | |
# PREDICTS is a large dataset of multiple ecological studies that | |
# have sampled (through counting, trapping, sighting... etc.) a given | |
# taxonomic group. | |
# Each study contains information regarding the number of samples taken, | |
# the taxonomy of the group, the study area etc. | |
# Here we will read in a small example dataset of PREDICTS and demonstrate | |
# how you can use dplyr with the question: how many habitats does each study represent? | |
# LIBS | |
library(dplyr) | |
# READ DATA FROM URL AND CONVERT TO TBL_DF | |
predicts <- read.csv("http://onlinelibrary.wiley.com/store/10.1002/ece3.1303/asset/supinfo/ece31303-sup-0002-DataS1.csv?v=1&s=f1c0f0c5a047aa08c65fb48a3186cecc18faa8a0") | |
predicts <- tbl_df(predicts) # convert to tbl_df | |
# INVESTIGATE | |
# how many names? | |
length(names(predicts)) | |
# how many columns? | |
ncol(predicts) | |
# print, don't worry dplyr won't spend hours printing! | |
print(predicts) | |
# HOW HABITATS ARE REPRESENTED BY EACH STUDY? | |
# Method 1: for loop | |
# Identify the studies by creating a new studies column | |
predicts$SSID <- paste0(predicts$Source_ID, '_', predicts$Study_number) | |
stds <- unique(predicts$SSID) | |
# Loop through and identify the the number of habitats in each study | |
nhabitats <- rep(0, length(stds)) | |
for(i in 1:length(nhabitats)) { | |
nhabitats[i] <- length(unique(predicts$Predominant_habitat[predicts$SSID == stds[i]])) | |
} | |
res_1 <- data.frame(stds, nhabitats) | |
# Method 2: group_by + summarise | |
# group | |
res_2 <- group_by(predicts, SSID) | |
# summarise | |
res_2 <- summarise(res_2, N_habitats=n_distinct(Predominant_habitat)) | |
# Method 3: Behold, the power of the pipe! | |
res_3 <- mutate(predicts, SSID=paste0(Source_ID, '_', Study_number)) %>% | |
group_by(SSID) %>% | |
summarise(N_habitats=n_distinct(Predominant_habitat)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment