Skip to content

Instantly share code, notes, and snippets.

@phewson
Last active August 29, 2015 14:08
Show Gist options
  • Save phewson/94e969270f2ce7b38a05 to your computer and use it in GitHub Desktop.
Save phewson/94e969270f2ce7b38a05 to your computer and use it in GitHub Desktop.
Version 0.1 Run a two sample t-test from Shiny
library(shiny)
shinyServer(function(input, output) {
Data <- reactive({
# output$contents <- renderTable({
# input$file1 will be NULL initially. After the user selects
# and uploads a file, it will be a data frame with 'name',
# 'size', 'type', and 'datapath' columns. The 'datapath'
# column will contain the local filenames where the data can
# be found.
inFile <- input$file1
if (is.null(inFile))
return(NULL)
read.csv(inFile$datapath, header=input$header, sep=input$sep,
quote=input$quote)
})
output$contents <- renderTable({
my.df <- Data()
if (is.null(my.df)){return(NULL)}
my.df})
output$plot <- renderPlot({
my.df <- Data()
if (is.null(my.df)){return(NULL)}
boxplot(my.df[,1]~my.df[,2], xlab = names(my.df)[1], ylab = names(my.df)[1],
main = paste("Study on ", names(my.df)[2]), col = "hotpink")})
output$logplot <- renderPlot({
my.df <- Data()
if (is.null(my.df)){return(NULL)}
if (min(my.df[,1]) <= 0 ){return(NULL)}
boxplot(log(my.df[,1])~my.df[,2], xlab = names(my.df)[1], ylab = names(my.df)[1],
main = paste("Study on ", names(my.df)[2]), col = "brown")})
ttestout <- reactive({
my.df <- Data()
if (is.null(my.df)){return(NULL)}
t.test(my.df[,1]~my.df[,2], var.equal = TRUE)
})
ttestoutlog <- reactive({
my.df <- Data()
if (is.null(my.df)){return(NULL)}
if (min(my.df[,1]) <= 0 ){return(NULL)}
t.test(log(my.df[,1])~my.df[,2], var.equal = TRUE)
})
output$tvalue <- renderPrint({
vals <- ttestout()
if (is.null(vals)){return(invisible())}
vals$statistic
})
output$tvaluelog <- renderPrint({
vals <- ttestoutlog()
if (is.null(vals)){return(invisible())}
vals$statistic
})
output$samplemeans1 <- renderPrint({
vals <- ttestout()
if (is.null(vals)){return(invisible())}
vals$estimate[1]
})
output$samplemeans1log <- renderPrint({
vals <- ttestoutlog()
if (is.null(vals)){return(invisible())}
vals$estimate[1]
})
output$samplemeans2 <- renderPrint({
vals <- ttestout()
if (is.null(vals)){return(invisible())}
vals$estimate[2]
})
output$samplemeans2log <- renderPrint({
vals <- ttestoutlog()
if (is.null(vals)){return(invisible())}
vals$estimate[2]
})
output$pvalue <- renderPrint({
vals <- ttestout()
if (is.null(vals)){return(invisible())}
vals$p.value
})
output$pvaluelog <- renderPrint({
vals <- ttestoutlog()
if (is.null(vals)){return(invisible())}
vals$p.value
})
output$fivenum <- renderTable({
my.df <- Data()
if (is.null(my.df)){return(NULL)}
quantiles <- tapply(my.df[,1], my.df[,2], quantile, c(0.25, 0.5, 0.975))
mins <- tapply(my.df[,1], my.df[,2], min)
maxs <- tapply(my.df[,1], my.df[,2], max)
fivenum <- data.frame(min = c(mins[1],mins[2] ), lower=c(unlist(quantiles[1])[1],unlist(quantiles[2])[1] ),
median=c(unlist(quantiles[1])[2],unlist(quantiles[2])[2] ), upper=c(unlist(quantiles[1])[3],unlist(quantiles[2])[3] ),
max = c(maxs[1],maxs[2] ))
return(fivenum)
})
output$fivenumlog <- renderTable({
my.df <- Data()
if (is.null(my.df)){return(NULL)}
if (min(my.df[,1]) <= 0 ){return(NULL)}
quantiles <- tapply(log(my.df[,1]), my.df[,2], quantile, c(0.25, 0.5, 0.975))
mins <- tapply(log(my.df[,1]), my.df[,2], min)
maxs <- tapply(log(my.df[,1]), my.df[,2], max)
fivenum <- data.frame(min = c(mins[1],mins[2] ), lower=c(unlist(quantiles[1])[1],unlist(quantiles[2])[1] ),
median=c(unlist(quantiles[1])[2],unlist(quantiles[2])[2] ), upper=c(unlist(quantiles[1])[3],unlist(quantiles[2])[3] ),
max = c(maxs[1],maxs[2] ))
return(fivenum)
})
output$parametric <- renderTable({
my.df <- Data()
if (is.null(my.df)){return(NULL)}
means <- tapply(my.df[,1], my.df[,2], mean)
sds <- tapply(my.df[,1], my.df[,2], sd)
ses<- tapply(my.df[,1], my.df[,2], function(x) sd(x)/sqrt(length(x)))
parametric <- data.frame(mean = c(means[1],means[2] ), sd=c(sds[1], sds[2]), se=c(ses[1], ses[2]))
return(parametric)
})
output$parametriclog <- renderTable({
my.df <- Data()
if (is.null(my.df)){return(NULL)}
if (min(my.df[,1]) <= 0 ){return(NULL)}
means <- tapply(log(my.df[,1]), my.df[,2], mean)
sds <- tapply(log(my.df[,1]), my.df[,2], sd)
ses<- tapply(log(my.df[,1]), my.df[,2], function(x) sd(x)/sqrt(length(x)))
parametric <- data.frame(mean = c(means[1],means[2] ), sd=c(sds[1], sds[2]), se=c(ses[1], ses[2]))
return(parametric)
})
})
library(shiny)
shinyUI(fluidPage(
titlePanel("Two sample t-test"),
sidebarLayout(
sidebarPanel(
fileInput('file1', 'Choose CSV File',
accept=c('text/csv',
'text/comma-separated-values,text/plain',
'.csv')),
tags$hr(),
checkboxInput('header', 'Header', TRUE),
radioButtons('sep', 'Separator',
c(Comma=',',
Semicolon=';',
Tab='\t'),
','),
radioButtons('quote', 'Quote',
c(None='',
'Double Quote'='"',
'Single Quote'="'"),
'"'),
selectInput(
"logs", "Log transform",
c(None = "no",
Logs = "yes"))
),
mainPanel(
tabsetPanel(
tabPanel("Data loading",
h1("This displays a table of your data (to check for errors)"),
tableOutput('contents')),
tabPanel("Data checking",
h1("This displays a box plot of your data"),
plotOutput('plot'),
h2("Five number summary"),
p("The min, lower quartile, median, upper quartile and max for group 1 are:"),
tableOutput('fivenum') ,
conditionalPanel(condition = "input.logs == 'yes'",
h1("This displays a box plot of your log transformed data"),
plotOutput('logplot'),
h2("Five number summary for log transformed data"),
p("The min, lower quartile, median, upper quartile and max for the logged data are:"),
tableOutput('fivenumlog'))
),
tabPanel("Parametric inference",
h2("Key summary statistics"),
p("The observed sample statistics were:"),
tableOutput('parametric'),
h2("Details on the t-test"),
p("We are testing the null hypothesis that in the population both means are the same"),
p("The observed test statistic (difference in means divided by pooled standard error:"),
textOutput('tvalue'),
p("The probability of getting t value this big or bigger (positive or negative) if the null hypothesis were true is"),
textOutput('pvalue'),
conditionalPanel(condition = "input.logs == 'yes'",
h2("Log data (if it exists)"),
p("On the log scale, the observed sample statistics were:"),
tableOutput('parametriclog'),
h2("Details on the t-test for logged data"),
p("We are testing the null hypothesis that in the population both logged means are the same"),
p("The observed test statistic (difference in logged means divided by pooled standard error:"),
textOutput('tvaluelog'),
p("The probability of getting t value this big or bigger (positive or negative) if the null hypothesis were true is"),
textOutput('pvaluelog')))
)
))))
@phewson
Copy link
Author

phewson commented Nov 8, 2014

This is a first (very early) of a function that will

  1. Upload a csv file
  2. Summarise and visualise
  3. Do a two sample t test.

Things to improve

  1. Be a bit more flexible about upload (assumes continuous variable in col 1 and grouping variable in col 2)
  2. Choice of visuals
  3. Plot of t-dist and polygon for two sided test.

@phewson
Copy link
Author

phewson commented Nov 11, 2014

This is a small improvement on the summary information given out. I think I want this as a tabbed web app (i.e. upload data then flip tabs)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment