Skip to content

Instantly share code, notes, and snippets.

@guidocor
Last active May 13, 2017 08:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guidocor/5f5188d301b971cf61684026c2bfc6a4 to your computer and use it in GitHub Desktop.
Save guidocor/5f5188d301b971cf61684026c2bfc6a4 to your computer and use it in GitHub Desktop.
# Snippet to call and install all libraries required with pacman package
if (!require('pacman')) install.packages('pacman'); library('pacman')
p_load(tidyverse, yarrr, trimr, lattice, doBy, Rmisc, reshape2)
# Dataset taken from trimr package. Check it out and
# don't miss this other way to do Reaction Time outlier detection
# at https://cran.r-project.org/web/packages/trimr/vignettes/trimr-vignette.html
data(exampleData)
head(exampleData)
# First of all: watch your data with lattice
# Several outliers appears at the boxplot
bwplot(~rt | participant, data = exampleData)
#bwplot(~rt | participant, data = exampleData %>% filter(., condition =="Switch"))
#bwplot(~rt | participant, data = exampleData %>% filter(., condition =="Switch"))
clean_rt <- function(df, rt, subject, form_extra="", borders = "")
{
# '@df = data.frame were execute the outlier detection
# '@rt = string with the reaction time colname
# '@form_extra = categories we want to split.
# '@border = a vector with the upper and lower bounds, example: c("200", "4000") if we want to
# delete rt lower than 200, and upper than 4000.
if(form_extra != ""){
function_to_apply = as.formula(paste(rt, "~", subject, "+", form_extra))
} else {
function_to_apply = as.formula(paste(rt, "~", subject))
}
require(doBy)
df <-data.frame(merge(df,
summaryBy(function_to_apply, data = df,
FUN = function(x) {
# notice that changing the function
# you can do other things
c(lower = boxplot.stats(x)$stats[1],
upper = boxplot.stats(x)$stats[5])
}), c(subject)))
# Create two new response and rt variables to manipulate
df$clean.rt<-df[, rt]
lower_name = paste0(rt, ".lower")
upper_name = paste0(rt, ".upper")
# Remove from the new variables the extreme values -> Below Q1-1.5*IQR or Over Q3+1.5*IQR
df$clean.rt[
which(df[,rt] <= df[, lower_name] | df[,rt] >= df[, upper_name])]<-NA
if(borders!= ""){
df$clean.rt[
which(df[,rt] <= df[, lower_name] | df[,rt] >= df[, upper_name]) | df[,rt] <= border[1] | df[,rt] >= border[2] ]<-NA
}
# Make the new rt variable numeric again
df$clean.rt<-as.numeric(as.character(df$clean.rt))
return(df)
}
# Apply the new function to our data frame
df <- clean_rt(exampleData, rt="rt", subject="participant")
df %>% head
# Check with lattice the distribution
bwplot(~clean.rt | participant, df)
# Calculate how many trials removed
(trials_removed <- plyr::rename(merge(c(sum(is.na(df$clean.rt))),
c(100*sum(is.na(df$clean.rt))/length(df$clean.rt))),
c("x" = "Removed", y = "% of total")) )
# Info by participant and block
(trials_removed_parts <- plyr::rename(data.frame(summaryBy(clean.rt ~ participant, data = df,
FUN = function(x) {
c(sum(is.na(x)), round(100*sum(is.na(x))/length(x),2))
})), c("clean.rt.FUN1" = "Removed", "clean.rt.FUN2" = "Removed %")) )
# create the summary of rt by it's median, quartiles and top deciles
df.summary <- df %>% group_by(., participant, condition) %>%
dplyr::summarise(., rt.median = median(clean.rt, na.rm = TRUE),
q1 =quantile(clean.rt, probs = c(0.25), na.rm=TRUE),
q3 =quantile(clean.rt, probs = c(0.75), na.rm=TRUE),
c1 =quantile(clean.rt, probs = c(0.1), na.rm=TRUE),
c9 =quantile(clean.rt, probs = c(0.9), na.rm=TRUE))
df.summary <- left_join(df.summary, trials_removed_parts)
df.summary
p1 <- function(){ # is easier in this way to save the plot: create a function and call it later
pirateplot(rt.median ~ condition, data = df.summary,
pal = gray(.05),
inf.method = "ci", # default is HDI
inf.within = "participant",
inf.disp = "bean",
main= "That's our title",
theme = 1, sortx = "sequential", jitter.val = 0.1, xlab=" ",
ylab = " ", cex.lab=2.5, gl.col = gray(.12), gl.lwd = 0.1 ,
bean.b.o = 0.9, bean.f.o = 0.5, inf.f.o = 0.4, point.cex = 1.7,
point.o = 0.5, point.pch = 16, cex.axis = 1)
}
bmp("plot.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it
p1()
dev.off()
# We want to graph the difference between tasks.
# We have a within measures design, so we remove the between participant variation
# with the Morey method trough the Rmisc package
# check http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
df.norm <- normDataWithin(df.summary, idvar = "participant", measurevar = "rt.median")
# when we deal with few data point, we can use a stripchart.
# Stripchart is a 1 dimension plot of the values. It's incredibly
# powerfull. No data is behind barplot and confidence intervals
# In our case we want to show the change of each participant
plot_stripchart <- function(df, cond, value, participant, title = " "){
# Based on https://www.r-bloggers.com/visualizing-small-scale-paired-data-combining-boxplots-stripcharts-and-confidence-intervals-in-r/
custom_formula <- as.formula(paste0(value, " ~ ", cond))
df.plot <- dcast(df[,c(cond, value ,participant)], as.formula(paste0( participant, "~", cond)) , value.var = value )
stripchart( custom_formula, data=df, vertical=T,pch=16,method="jitter" , main = title, ylab = " ")
conds_label <- unique(as.character(df[[cond]] ) )
pre <- df.plot[[conds_label[1]]]
post <- df.plot[[conds_label[2]]]
s<-seq(length(pre))
segments(rep(1.2,length(pre))[s],pre[s],rep(1.8,length(pre))[s],post[s],col=1,lwd=0.5)
}
bmp("strip.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it
plot_stripchart(df.norm, cond="condition", value= "rt.medianNormed",
participant = "participant", title="This is a title")
mtext("Median RT ", side = 2, line = 2, cex = 1.5, font = 2)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment