Skip to content

Instantly share code, notes, and snippets.

View hauselin's full-sized avatar

Hause Lin hauselin

View GitHub Profile
############################################################################
# #
# Excluding participants who participated in previous studies #
# By Arnoud Plantinga, based on Gabriele Paolacci's Excel solution #
# #
# Instructions (Note: edit only the non-indented lines): #
# #
# 1. Create a qualification (e.g., "Study 1"; keep in mind that the name #
# will be visible to Workers) in MTurk/Manage/Qualification Types #
# #
@hauselin
hauselin / Detect Outliers
Last active August 29, 2015 14:23
Two R functions to detect and remove outliers using standard-score or MAD method
#for tutorial, see https://rpubs.com/hauselin/outliersDetect
#clean vector using Z-score cut-off method; return a vector with outliers = replace (default is NA)
outliersZ <- function(data, zCutOff = 1.96, replace = NA, values = FALSE, digits = 2) {
#compute standard deviation (sample version n = n [not n-1])
stdev <- sqrt(sum((data - mean(data, na.rm = T))^2, na.rm = T) / sum(!is.na(data)))
#compute absolute Z values for each value
absZ <- abs(data - mean(data, na.rm = T)) / stdev
#subset data that has absZ greater than the zCutOff and replace them with replace
#can also replace with other values (such as max/mean of data)
data[absZ > zCutOff] <- replace
data <- structure(list(id = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 11L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 14L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
16L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 19L, 19L,
19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L,
22L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L,
24L, 24L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 27L,
#function to compute R2s in logistic regression
logisticR2s <- function(logisticModel) {
modelDeviance <- logisticModel$deviance
nullDeviance <- logisticModel$null.deviance
n <- length(logisticModel$fitted.values)
R2HosmerLemeshow <- 1 - modelDeviance / nullDeviance
R2CoxSnell <- 1 - exp((modelDeviance - nullDeviance) / n)
R2Nagelkerke <- R2CoxSnell / (1 - exp( - (nullDeviance / n)))
@hauselin
hauselin / gist:299fc906e3de49b73cb2
Last active May 12, 2016 01:44
R functions for Qualtrics
#function to read raw data from Qualtrics
cleanQualtrics <- function(csvFile, rowAsHeader, skipRows) {
#this function assumes that you have named your
#Qualtrics questions properly when setting up the survey;
#if questions are properly named, then the first row
#will be most informative and suitable for use as column names
#read.csv sets header = T by default; stringsAsFactor set to FALSE to ensure strings aren't converted to factors
QualtricsRaw <- read.csv(csvFile, header = F, stringsAsFactors = F)
#row 1 contains the strings that we'd like to use as column names; select row 1 and turn them into characters
colNames <- as.character(QualtricsRaw[rowAsHeader,])
summarySE2 <- function (data = NULL, measurevar, groupvars = NULL, na.rm = TRUE, conf.interval = 0.95) {
library(data.table)
data <- data.table(data)
length2 <- function(x, na.rm = FALSE) {
if (na.rm)
sum(!is.na(x))
else length(x)
}
Atom Settings
@hauselin
hauselin / README.md
Created March 6, 2019 09:46 — forked from hofmannsven/README.md
My simply Git Cheatsheet
def simulate(simulations=1000, n=500, quantile=0.80):
correlations = np.zeros(simulations)
for i in range(simulations):
data = {"personality": np.random.randn(
n), "attract": np.random.randn(n)}
df = pd.DataFrame(data)
df_long = pd.DataFrame(
{"student": ["Andy", "Bernie", "Cindy", "Deb", "Andy", "Bernie", "Cindy", "Deb", "Andy", "Bernie", "Cindy", "Deb"],
"school": ["Z", "Y", "Z", "Y", "Z", "Y", "Z", "Y", "Z", "Y", "Z", "Y"],
"class": ["english", "english", "english", "english", "math", "math", "math", "math", "physics", "physics", "physics", "physics"],
"grade": [10, 100, 1000, 10000, 20, 200, 2000, 20000, 30, 300, 3000, 30000]
}
)
df_long
> student school class grade