guidocor/clean_and_graph_RT.R

## clean_and_graph_RT.R
# Snippet to call and install all libraries required with pacman package
if (!require('pacman')) install.packages('pacman'); library('pacman')
p_load(tidyverse, yarrr, trimr, lattice, doBy, Rmisc, reshape2)

# Dataset taken from trimr package. Check it out and
# don't miss this other way to do Reaction Time outlier detection
# at https://cran.r-project.org/web/packages/trimr/vignettes/trimr-vignette.html
data(exampleData)
head(exampleData)

# First of all: watch your data with lattice
# Several outliers appears at the boxplot
bwplot(~rt | participant, data = exampleData)
#bwplot(~rt | participant, data = exampleData %>% filter(., condition =="Switch"))
#bwplot(~rt | participant, data = exampleData %>% filter(., condition =="Switch"))


clean_rt <- function(df, rt, subject, form_extra="", borders = "")
{
  # '@df = data.frame were execute the outlier detection
  # '@rt = string with the reaction time colname
  # '@form_extra = categories we want to split.
  # '@border = a vector with the upper and lower bounds, example: c("200", "4000") if we want to
  #  delete rt lower than 200, and upper than 4000.
  if(form_extra != ""){
    function_to_apply = as.formula(paste(rt, "~", subject, "+", form_extra))
    } else {
    function_to_apply = as.formula(paste(rt, "~", subject))
    }

   require(doBy)


  df <-data.frame(merge(df,
          summaryBy(function_to_apply, data = df,
                        FUN = function(x) {
                        # notice that changing the function
                        # you can do other things
                          c(lower = boxplot.stats(x)$stats[1],
                            upper = boxplot.stats(x)$stats[5])
                          }), c(subject)))
   # Create two new response and rt variables to manipulate
  df$clean.rt<-df[, rt]
  lower_name = paste0(rt, ".lower")
  upper_name = paste0(rt, ".upper")


  # Remove from the new variables the extreme values -> Below Q1-1.5*IQR or Over Q3+1.5*IQR
  df$clean.rt[
    which(df[,rt] <= df[, lower_name] | df[,rt] >= df[, upper_name])]<-NA
  if(borders!= ""){
    df$clean.rt[
      which(df[,rt] <= df[, lower_name] | df[,rt] >= df[, upper_name]) | df[,rt] <= border[1] | df[,rt] >= border[2] ]<-NA
  }


  # Make the new rt variable numeric again
  df$clean.rt<-as.numeric(as.character(df$clean.rt))


  return(df)

}

# Apply the new function to our data frame
df <- clean_rt(exampleData, rt="rt", subject="participant")
df %>% head
# Check with lattice the distribution
bwplot(~clean.rt | participant, df)


# Calculate how many trials removed
(trials_removed <- plyr::rename(merge(c(sum(is.na(df$clean.rt))),
                                     c(100*sum(is.na(df$clean.rt))/length(df$clean.rt))),
                               c("x" = "Removed", y = "% of total"))  )


# Info by participant and block
(trials_removed_parts <- plyr::rename(data.frame(summaryBy(clean.rt ~ participant, data = df,
                                           FUN = function(x) {
                                             c(sum(is.na(x)), round(100*sum(is.na(x))/length(x),2))
                                           })), c("clean.rt.FUN1" = "Removed", "clean.rt.FUN2" = "Removed %")) )

# create the summary of rt by it's median, quartiles and top deciles

df.summary <- df %>% group_by(., participant, condition) %>%
  dplyr::summarise(., rt.median = median(clean.rt, na.rm = TRUE),
                        q1 =quantile(clean.rt, probs = c(0.25), na.rm=TRUE),
                        q3 =quantile(clean.rt, probs = c(0.75), na.rm=TRUE),
                        c1 =quantile(clean.rt, probs = c(0.1), na.rm=TRUE),
                        c9 =quantile(clean.rt, probs = c(0.9), na.rm=TRUE))
df.summary <- left_join(df.summary, trials_removed_parts)
df.summary
p1 <- function(){ # is easier in this way to save the plot: create a function and call it later
pirateplot(rt.median ~ condition, data =  df.summary,
           pal = gray(.05),
           inf.method = "ci", # default is HDI
           inf.within = "participant",
           inf.disp = "bean",
           main= "That's our title",
           theme = 1,  sortx = "sequential", jitter.val = 0.1, xlab=" ",
           ylab = " ", cex.lab=2.5, gl.col = gray(.12), gl.lwd = 0.1 ,
           bean.b.o = 0.9, bean.f.o = 0.5, inf.f.o = 0.4, point.cex = 1.7,
           point.o = 0.5, point.pch = 16, cex.axis = 1)

}

bmp("plot.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it
p1()
dev.off()

# We want to graph the difference between tasks.
# We have a within measures design, so we remove the between participant variation
# with the  Morey method trough the Rmisc package
# check http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
df.norm <- normDataWithin(df.summary, idvar = "participant", measurevar = "rt.median")

# when we deal with few data point, we can use a stripchart.
# Stripchart is a 1 dimension plot of the values. It's incredibly
# powerfull. No data is behind barplot and confidence intervals
# In our case we want to show the change of each participant
plot_stripchart <- function(df, cond, value, participant, title = " "){
  # Based on https://www.r-bloggers.com/visualizing-small-scale-paired-data-combining-boxplots-stripcharts-and-confidence-intervals-in-r/
  custom_formula <- as.formula(paste0(value, " ~ ", cond))
  df.plot <- dcast(df[,c(cond, value ,participant)], as.formula(paste0( participant, "~", cond)) , value.var = value )
  stripchart( custom_formula, data=df, vertical=T,pch=16,method="jitter" , main = title, ylab = " ")
  conds_label <- unique(as.character(df[[cond]] ) )
  pre <-  df.plot[[conds_label[1]]]
  post <-  df.plot[[conds_label[2]]]
  s<-seq(length(pre))
  segments(rep(1.2,length(pre))[s],pre[s],rep(1.8,length(pre))[s],post[s],col=1,lwd=0.5)
}

bmp("strip.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it
plot_stripchart(df.norm, cond="condition", value= "rt.medianNormed",
                participant = "participant", title="This is a title")
mtext("Median RT ", side = 2, line = 2, cex = 1.5, font = 2)
dev.off()
	# Snippet to call and install all libraries required with pacman package
	if (!require('pacman')) install.packages('pacman'); library('pacman')
	p_load(tidyverse, yarrr, trimr, lattice, doBy, Rmisc, reshape2)

	# Dataset taken from trimr package. Check it out and
	# don't miss this other way to do Reaction Time outlier detection
	# at https://cran.r-project.org/web/packages/trimr/vignettes/trimr-vignette.html
	data(exampleData)
	head(exampleData)

	# First of all: watch your data with lattice
	# Several outliers appears at the boxplot
	bwplot(~rt \| participant, data = exampleData)
	#bwplot(~rt \| participant, data = exampleData %>% filter(., condition =="Switch"))
	#bwplot(~rt \| participant, data = exampleData %>% filter(., condition =="Switch"))


	clean_rt <- function(df, rt, subject, form_extra="", borders = "")
	{
	# '@df = data.frame were execute the outlier detection
	# '@rt = string with the reaction time colname
	# '@form_extra = categories we want to split.
	# '@border = a vector with the upper and lower bounds, example: c("200", "4000") if we want to
	# delete rt lower than 200, and upper than 4000.
	if(form_extra != ""){
	function_to_apply = as.formula(paste(rt, "~", subject, "+", form_extra))
	} else {
	function_to_apply = as.formula(paste(rt, "~", subject))
	}

	require(doBy)



	df <-data.frame(merge(df,
	summaryBy(function_to_apply, data = df,
	FUN = function(x) {
	# notice that changing the function
	# you can do other things
	c(lower = boxplot.stats(x)$stats[1],
	upper = boxplot.stats(x)$stats[5])
	}), c(subject)))
	# Create two new response and rt variables to manipulate
	df$clean.rt<-df[, rt]
	lower_name = paste0(rt, ".lower")
	upper_name = paste0(rt, ".upper")


	# Remove from the new variables the extreme values -> Below Q1-1.5IQR or Over Q3+1.5IQR
	df$clean.rt[
	which(df[,rt] <= df[, lower_name] \| df[,rt] >= df[, upper_name])]<-NA
	if(borders!= ""){
	df$clean.rt[
	which(df[,rt] <= df[, lower_name] \| df[,rt] >= df[, upper_name]) \| df[,rt] <= border[1] \| df[,rt] >= border[2] ]<-NA
	}


	# Make the new rt variable numeric again
	df$clean.rt<-as.numeric(as.character(df$clean.rt))


	return(df)

	}

	# Apply the new function to our data frame
	df <- clean_rt(exampleData, rt="rt", subject="participant")
	df %>% head
	# Check with lattice the distribution
	bwplot(~clean.rt \| participant, df)



	# Calculate how many trials removed
	(trials_removed <- plyr::rename(merge(c(sum(is.na(df$clean.rt))),
	c(100*sum(is.na(df$clean.rt))/length(df$clean.rt))),
	c("x" = "Removed", y = "% of total")) )


	# Info by participant and block
	(trials_removed_parts <- plyr::rename(data.frame(summaryBy(clean.rt ~ participant, data = df,
	FUN = function(x) {
	c(sum(is.na(x)), round(100*sum(is.na(x))/length(x),2))
	})), c("clean.rt.FUN1" = "Removed", "clean.rt.FUN2" = "Removed %")) )

	# create the summary of rt by it's median, quartiles and top deciles

	df.summary <- df %>% group_by(., participant, condition) %>%
	dplyr::summarise(., rt.median = median(clean.rt, na.rm = TRUE),
	q1 =quantile(clean.rt, probs = c(0.25), na.rm=TRUE),
	q3 =quantile(clean.rt, probs = c(0.75), na.rm=TRUE),
	c1 =quantile(clean.rt, probs = c(0.1), na.rm=TRUE),
	c9 =quantile(clean.rt, probs = c(0.9), na.rm=TRUE))
	df.summary <- left_join(df.summary, trials_removed_parts)
	df.summary
	p1 <- function(){ # is easier in this way to save the plot: create a function and call it later
	pirateplot(rt.median ~ condition, data = df.summary,
	pal = gray(.05),
	inf.method = "ci", # default is HDI
	inf.within = "participant",
	inf.disp = "bean",
	main= "That's our title",
	theme = 1, sortx = "sequential", jitter.val = 0.1, xlab=" ",
	ylab = " ", cex.lab=2.5, gl.col = gray(.12), gl.lwd = 0.1 ,
	bean.b.o = 0.9, bean.f.o = 0.5, inf.f.o = 0.4, point.cex = 1.7,
	point.o = 0.5, point.pch = 16, cex.axis = 1)

	}

	bmp("plot.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it
	p1()
	dev.off()

	# We want to graph the difference between tasks.
	# We have a within measures design, so we remove the between participant variation
	# with the Morey method trough the Rmisc package
	# check http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
	df.norm <- normDataWithin(df.summary, idvar = "participant", measurevar = "rt.median")

	# when we deal with few data point, we can use a stripchart.
	# Stripchart is a 1 dimension plot of the values. It's incredibly
	# powerfull. No data is behind barplot and confidence intervals
	# In our case we want to show the change of each participant
	plot_stripchart <- function(df, cond, value, participant, title = " "){
	# Based on https://www.r-bloggers.com/visualizing-small-scale-paired-data-combining-boxplots-stripcharts-and-confidence-intervals-in-r/
	custom_formula <- as.formula(paste0(value, " ~ ", cond))
	df.plot <- dcast(df[,c(cond, value ,participant)], as.formula(paste0( participant, "~", cond)) , value.var = value )
	stripchart( custom_formula, data=df, vertical=T,pch=16,method="jitter" , main = title, ylab = " ")
	conds_label <- unique(as.character(df[[cond]] ) )
	pre <- df.plot[[conds_label[1]]]
	post <- df.plot[[conds_label[2]]]
	s<-seq(length(pre))
	segments(rep(1.2,length(pre))[s],pre[s],rep(1.8,length(pre))[s],post[s],col=1,lwd=0.5)
	}

	bmp("strip.bmp", width = 7, height = 5, units = "in" , res = 160) # create the plot to save it
	plot_stripchart(df.norm, cond="condition", value= "rt.medianNormed",
	participant = "participant", title="This is a title")
	mtext("Median RT ", side = 2, line = 2, cex = 1.5, font = 2)
	dev.off()