Last active
May 13, 2017 08:46
-
-
Save guidocor/5f5188d301b971cf61684026c2bfc6a4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Snippet to call and install all libraries required with pacman package | |
if (!require('pacman')) install.packages('pacman'); library('pacman') | |
p_load(tidyverse, yarrr, trimr, lattice, doBy, Rmisc, reshape2) | |
# Dataset taken from trimr package. Check it out and | |
# don't miss this other way to do Reaction Time outlier detection | |
# at https://cran.r-project.org/web/packages/trimr/vignettes/trimr-vignette.html | |
data(exampleData) | |
head(exampleData) | |
# First of all: watch your data with lattice | |
# Several outliers appears at the boxplot | |
bwplot(~rt | participant, data = exampleData) | |
#bwplot(~rt | participant, data = exampleData %>% filter(., condition =="Switch")) | |
#bwplot(~rt | participant, data = exampleData %>% filter(., condition =="Switch")) | |
clean_rt <- function(df, rt, subject, form_extra="", borders = "") | |
{ | |
# '@df = data.frame were execute the outlier detection | |
# '@rt = string with the reaction time colname | |
# '@form_extra = categories we want to split. | |
# '@border = a vector with the upper and lower bounds, example: c("200", "4000") if we want to | |
# delete rt lower than 200, and upper than 4000. | |
if(form_extra != ""){ | |
function_to_apply = as.formula(paste(rt, "~", subject, "+", form_extra)) | |
} else { | |
function_to_apply = as.formula(paste(rt, "~", subject)) | |
} | |
require(doBy) | |
df <-data.frame(merge(df, | |
summaryBy(function_to_apply, data = df, | |
FUN = function(x) { | |
# notice that changing the function | |
# you can do other things | |
c(lower = boxplot.stats(x)$stats[1], | |
upper = boxplot.stats(x)$stats[5]) | |
}), c(subject))) | |
# Create two new response and rt variables to manipulate | |
df$clean.rt<-df[, rt] | |
lower_name = paste0(rt, ".lower") | |
upper_name = paste0(rt, ".upper") | |
# Remove from the new variables the extreme values -> Below Q1-1.5*IQR or Over Q3+1.5*IQR | |
df$clean.rt[ | |
which(df[,rt] <= df[, lower_name] | df[,rt] >= df[, upper_name])]<-NA | |
if(borders!= ""){ | |
df$clean.rt[ | |
which(df[,rt] <= df[, lower_name] | df[,rt] >= df[, upper_name]) | df[,rt] <= border[1] | df[,rt] >= border[2] ]<-NA | |
} | |
# Make the new rt variable numeric again | |
df$clean.rt<-as.numeric(as.character(df$clean.rt)) | |
return(df) | |
} | |
# Apply the new function to our data frame | |
df <- clean_rt(exampleData, rt="rt", subject="participant") | |
df %>% head | |
# Check with lattice the distribution | |
bwplot(~clean.rt | participant, df) | |
# Calculate how many trials removed | |
(trials_removed <- plyr::rename(merge(c(sum(is.na(df$clean.rt))), | |
c(100*sum(is.na(df$clean.rt))/length(df$clean.rt))), | |
c("x" = "Removed", y = "% of total")) ) | |
# Info by participant and block | |
(trials_removed_parts <- plyr::rename(data.frame(summaryBy(clean.rt ~ participant, data = df, | |
FUN = function(x) { | |
c(sum(is.na(x)), round(100*sum(is.na(x))/length(x),2)) | |
})), c("clean.rt.FUN1" = "Removed", "clean.rt.FUN2" = "Removed %")) ) | |
# create the summary of rt by it's median, quartiles and top deciles | |
df.summary <- df %>% group_by(., participant, condition) %>% | |
dplyr::summarise(., rt.median = median(clean.rt, na.rm = TRUE), | |
q1 =quantile(clean.rt, probs = c(0.25), na.rm=TRUE), | |
q3 =quantile(clean.rt, probs = c(0.75), na.rm=TRUE), | |
c1 =quantile(clean.rt, probs = c(0.1), na.rm=TRUE), | |
c9 =quantile(clean.rt, probs = c(0.9), na.rm=TRUE)) | |
df.summary <- left_join(df.summary, trials_removed_parts) | |
df.summary | |
p1 <- function(){ # is easier in this way to save the plot: create a function and call it later | |
pirateplot(rt.median ~ condition, data = df.summary, | |
pal = gray(.05), | |
inf.method = "ci", # default is HDI | |
inf.within = "participant", | |
inf.disp = "bean", | |
main= "That's our title", | |
theme = 1, sortx = "sequential", jitter.val = 0.1, xlab=" ", | |
ylab = " ", cex.lab=2.5, gl.col = gray(.12), gl.lwd = 0.1 , | |
bean.b.o = 0.9, bean.f.o = 0.5, inf.f.o = 0.4, point.cex = 1.7, | |
point.o = 0.5, point.pch = 16, cex.axis = 1) | |
} | |
bmp("plot.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it | |
p1() | |
dev.off() | |
# We want to graph the difference between tasks. | |
# We have a within measures design, so we remove the between participant variation | |
# with the Morey method trough the Rmisc package | |
# check http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/ | |
df.norm <- normDataWithin(df.summary, idvar = "participant", measurevar = "rt.median") | |
# when we deal with few data point, we can use a stripchart. | |
# Stripchart is a 1 dimension plot of the values. It's incredibly | |
# powerfull. No data is behind barplot and confidence intervals | |
# In our case we want to show the change of each participant | |
plot_stripchart <- function(df, cond, value, participant, title = " "){ | |
# Based on https://www.r-bloggers.com/visualizing-small-scale-paired-data-combining-boxplots-stripcharts-and-confidence-intervals-in-r/ | |
custom_formula <- as.formula(paste0(value, " ~ ", cond)) | |
df.plot <- dcast(df[,c(cond, value ,participant)], as.formula(paste0( participant, "~", cond)) , value.var = value ) | |
stripchart( custom_formula, data=df, vertical=T,pch=16,method="jitter" , main = title, ylab = " ") | |
conds_label <- unique(as.character(df[[cond]] ) ) | |
pre <- df.plot[[conds_label[1]]] | |
post <- df.plot[[conds_label[2]]] | |
s<-seq(length(pre)) | |
segments(rep(1.2,length(pre))[s],pre[s],rep(1.8,length(pre))[s],post[s],col=1,lwd=0.5) | |
} | |
bmp("strip.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it | |
plot_stripchart(df.norm, cond="condition", value= "rt.medianNormed", | |
participant = "participant", title="This is a title") | |
mtext("Median RT ", side = 2, line = 2, cex = 1.5, font = 2) | |
dev.off() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment