Skip to content

Instantly share code, notes, and snippets.

@vankesteren
Created August 3, 2016 12:50
Show Gist options
  • Save vankesteren/4fdb90568e96b323e56f7ec225a0dac4 to your computer and use it in GitHub Desktop.
Save vankesteren/4fdb90568e96b323e56f7ec225a0dac4 to your computer and use it in GitHub Desktop.
# Generates a test dataset in the directory below:
setwd("~/2-Werk/JASP/code/EJ-dataset")
rm(list = ls())
library(MASS)
# Different items are necessary to test JASP procedures
# Continuous Variables ----------------------------------------------------
# Create mvrnorm standard normal data with cor 0.68
s <- matrix(c(1,0.68,0.68,1), nrow = 2)
mvn <- mvrnorm(100,c(0,0),s)
cont <- data.frame(contNormal = rnorm(100), # Standard Normal
contGamma = rgamma(100,2), # Gamma Distributed
contBinom = rbinom(100, 1, 0.4), # Bernoulli trials
contExpon = exp(rnorm(100, sd = 50)), # Exponentiated normal
contWide = runif(100,-9e99,9e99), # Very wide interval
contNarrow = runif(100,-1e-99,1e-99), # Very narrow
contOutlier = sample(c(rnorm(95), # With outliers
c(12,-23,4.5,5.7,-3.12)),100),
contcor1 = mvn[,1], # Multivariate normal with cor 0.68
contcor2 = mvn[,2])
# Factors -----------------------------------------------------------------
fac <- data.frame(facGender = factor(sample(rep(c("m", "f"), 50), replace = F)),
facExperim = factor(rep(c("control", "experimental"), 50)),
facFive = factor(rep(1:5, 20)),
facFifty = factor(c(1:50,1:50)),
facOutlier = factor(c(rep(c("f1","f2"),49), "f3",
"totallyridiculoussuperlongfactorname")))
# Debug -------------------------------------------------------------------
# For Collinearity & exact equality
col <- rbeta(100, 23, 12)
eq <- rnorm(100,10,2.5) * rgamma(100,1)
deb <- data.frame(debString = sample(letters, 100, T), # Random letter string
debMiss1 = sample(c(rnorm(99,10,25), NA)), # Various # Missing
debMiss30 = sample(c(rnorm(70,10,25), rep(NA,30))),
debMiss80 = sample(c(rnorm(20,10,25), rep(NA,80))),
debMiss99 = sample(c(rnorm(1,10,25), rep(NA,99))),
debBinMiss20 = sample(c(rbinom(80,1,0.6), rep(NA, 20))),
debNaN = rep(NaN, 100), # All NaN
debNaN10 = sample(c(rnorm(90,10,25), rep(NaN,10))), # 10 NaN
debInf = rep(Inf, 100), # All Inf values
debCollin1 = col, # Three multicollinear variables
debCollin2 = col + 2,
debCollin3 = col * 2,
debEqual1 = eq, # Two exactly equal variables
debEqual2 = eq,
debSame = rep(12.3,100)) # Exactly the same value 100 times
# Export Datasets ---------------------------------------------------------
testData1 <- cbind(cont,fac,deb)
testData2 <- testData1[1,]
write.csv(testData1, file = "testData.csv")
write.csv(testData2, file = "testDataOneRow.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment