Skip to content

Instantly share code, notes, and snippets.

@Keiku
Last active February 22, 2017 02:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Keiku/6640452b66c9fbea1b569de7fdfd1c11 to your computer and use it in GitHub Desktop.
Save Keiku/6640452b66c9fbea1b569de7fdfd1c11 to your computer and use it in GitHub Desktop.
Reshaping with tidyr
library("dplyr")
library("tidyr")
library("data.table")
smp <- data_frame(
ID = rep(1:3, 2),
BMI = rep(c(21, 26), 3),
sbp = rep(c(150, 120), 3),
nendo = rep(2008:2009, 3)
)
# ID BMI sbp nendo
# 1 1 21 150 2008
# 2 2 26 120 2009
# 3 3 21 150 2008
# 4 1 26 120 2009
# 5 2 21 150 2008
# 6 3 26 120 2009
smp_reshaped <- stats::reshape(as.data.frame(smp), idvar="ID", timevar="nendo", direction="wide")
# ID BMI.2008 sbp.2008 BMI.2009 sbp.2009
# 1 1 21 150 26 120
# 2 2 21 150 26 120
# 3 3 21 150 26 120
smp_reshaped <- dcast.data.table(
as.data.table(smp),
ID ~ nendo,
value.var = c("BMI", "sbp"),
fill = NA,
fun.aggregate = NULL
)
# ID BMI_2008 BMI_2009 sbp_2008 sbp_2009
# 1: 1 21 26 150 120
# 2: 2 21 26 150 120
# 3: 3 21 26 150 120
tidyr_reshape <- function(df, id, by, valiables, sep){
n_key <- df %>% n_distinct(id, by)
if (n_key != nrow(df)){
stop("Duplicate identifiers for rows")
} else if (n_key == nrow(df)){
df <- df %>%
select_(.dots = c(id, by, valiables)) %>%
gather_(key_col="valiables", value_col="value", gather_cols=valiables) %>%
unite_(col="by_valiables", from=c("valiables", by), sep=sep) %>%
spread_(key_col="by_valiables", value_col="value", fill=NA)
}
return(df)
}
smp_reshaped <- tidyr_reshape(smp, "ID", "nendo", c("BMI", "sbp"), "_")
# A tibble: 3 x 5
# ID BMI_2008 BMI_2009 sbp_2008 sbp_2009
# * <int> <dbl> <dbl> <dbl> <dbl>
# 1 1 21 26 150 120
# 2 2 21 26 150 120
# 3 3 21 26 150 120
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment