Hause Lin hauselin

## MTurk update workerlist.R
  ############################################################################
  #                                                                          #
  #  Excluding participants who participated in previous studies             #
  #  By Arnoud Plantinga, based on Gabriele Paolacci's Excel solution        #
  #                                                                          #
  #  Instructions (Note: edit only the non-indented lines):                  #
  #                                                                          #
  #  1. Create a qualification (e.g., "Study 1"; keep in mind that the name  #
  #  will be visible to Workers) in MTurk/Manage/Qualification Types         #
  #                                                                          #

## Detect Outliers
#for tutorial, see https://rpubs.com/hauselin/outliersDetect
#clean vector using Z-score cut-off method; return a vector with outliers = replace (default is NA)
outliersZ <- function(data, zCutOff = 1.96, replace = NA, values = FALSE, digits = 2) {
    #compute standard deviation (sample version n = n [not n-1])
    stdev <- sqrt(sum((data - mean(data, na.rm = T))^2, na.rm = T) / sum(!is.na(data)))
    #compute absolute Z values for each value
    absZ <- abs(data - mean(data, na.rm = T)) / stdev
    #subset data that has absZ greater than the zCutOff and replace them with replace
    #can also replace with other values (such as max/mean of data)
    data[absZ > zCutOff] <- replace

## gist:5f74ce5251521b72df61
data <- structure(list(id = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
                                2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
                                5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
                                8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
                                11L, 11L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 14L,
                                14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
                                16L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 19L, 19L,
                                19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L,
                                22L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L,
                                24L, 24L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 27L,

## logisticRegressionR2s.R
#function to compute R2s in logistic regression
logisticR2s <- function(logisticModel) {
    modelDeviance <- logisticModel$deviance
    nullDeviance <- logisticModel$null.deviance
    n <- length(logisticModel$fitted.values)

    R2HosmerLemeshow <- 1 - modelDeviance / nullDeviance
    R2CoxSnell <- 1 - exp((modelDeviance - nullDeviance) / n)
    R2Nagelkerke <- R2CoxSnell / (1 - exp( - (nullDeviance / n)))


## gist:299fc906e3de49b73cb2
#function to read raw data from Qualtrics
cleanQualtrics <- function(csvFile, rowAsHeader, skipRows) {
    #this function assumes that you have named your
    #Qualtrics questions properly when setting up the survey;
    #if questions are properly named, then the first row
    #will be most informative and suitable for use as column names
    #read.csv sets header = T by default; stringsAsFactor set to FALSE to ensure strings aren't converted to factors
    QualtricsRaw <- read.csv(csvFile, header = F, stringsAsFactors = F)
    #row 1 contains the strings that we'd like to use as column names; select row 1 and turn them into characters
    colNames <- as.character(QualtricsRaw[rowAsHeader,])

## summarySE2
summarySE2 <- function (data = NULL, measurevar, groupvars = NULL, na.rm = TRUE, conf.interval = 0.95) {
    library(data.table)
    data <- data.table(data)

    length2 <- function(x, na.rm = FALSE) {
        if (na.rm)
            sum(!is.na(x))
        else length(x)
    }


## AtomSettings
Atom Settings

## README.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hauselin
                / README.md
            
            
              Created
              March 6, 2019 09:46
                — forked from hofmannsven/README.md
            
              
                My simply Git Cheatsheet
              
          
    Using Git

Global Settings


Related Setup: https://gist.github.com/hofmannsven/6814278
Related Pro Tips: https://ochronus.com/git-tips-from-the-trenches/
Interactive Beginners Tutorial: http://try.github.io/
Git Cheatsheet by GitHub: https://services.github.com/on-demand/downloads/github-git-cheat-sheet/


## simulate_attract_personality.py
def simulate(simulations=1000, n=500, quantile=0.80):

    correlations = np.zeros(simulations)

    for i in range(simulations):

        data = {"personality": np.random.randn(
            n), "attract": np.random.randn(n)}
        df = pd.DataFrame(data)


## gist:c21dc322c67549afed56e2ccd6500d50
df_long = pd.DataFrame(
  {"student": ["Andy", "Bernie", "Cindy", "Deb", "Andy", "Bernie", "Cindy", "Deb", "Andy", "Bernie", "Cindy", "Deb"],
   "school":  ["Z", "Y", "Z", "Y", "Z", "Y", "Z", "Y", "Z", "Y", "Z", "Y"],
   "class":   ["english", "english", "english", "english", "math", "math", "math", "math", "physics", "physics", "physics", "physics"],
   "grade":   [10, 100, 1000, 10000, 20, 200, 2000, 20000, 30, 300, 3000, 30000]
  }
)

df_long
>    student school    class  grade
	############################################################################
	# #
	# Excluding participants who participated in previous studies #
	# By Arnoud Plantinga, based on Gabriele Paolacci's Excel solution #
	# #
	# Instructions (Note: edit only the non-indented lines): #
	# #
	# 1. Create a qualification (e.g., "Study 1"; keep in mind that the name #
	# will be visible to Workers) in MTurk/Manage/Qualification Types #
	# #
	#for tutorial, see https://rpubs.com/hauselin/outliersDetect
	#clean vector using Z-score cut-off method; return a vector with outliers = replace (default is NA)
	outliersZ <- function(data, zCutOff = 1.96, replace = NA, values = FALSE, digits = 2) {
	#compute standard deviation (sample version n = n [not n-1])
	stdev <- sqrt(sum((data - mean(data, na.rm = T))^2, na.rm = T) / sum(!is.na(data)))
	#compute absolute Z values for each value
	absZ <- abs(data - mean(data, na.rm = T)) / stdev
	#subset data that has absZ greater than the zCutOff and replace them with replace
	#can also replace with other values (such as max/mean of data)
	data[absZ > zCutOff] <- replace
	data <- structure(list(id = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
	2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L,
	5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L,
	8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
	11L, 11L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 14L,
	14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L,
	16L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 19L, 19L,
	19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L,
	22L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L,
	24L, 24L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 27L,
	#function to compute R2s in logistic regression
	logisticR2s <- function(logisticModel) {
	modelDeviance <- logisticModel$deviance
	nullDeviance <- logisticModel$null.deviance
	n <- length(logisticModel$fitted.values)

	R2HosmerLemeshow <- 1 - modelDeviance / nullDeviance
	R2CoxSnell <- 1 - exp((modelDeviance - nullDeviance) / n)
	R2Nagelkerke <- R2CoxSnell / (1 - exp( - (nullDeviance / n)))
	#function to read raw data from Qualtrics
	cleanQualtrics <- function(csvFile, rowAsHeader, skipRows) {
	#this function assumes that you have named your
	#Qualtrics questions properly when setting up the survey;
	#if questions are properly named, then the first row
	#will be most informative and suitable for use as column names
	#read.csv sets header = T by default; stringsAsFactor set to FALSE to ensure strings aren't converted to factors
	QualtricsRaw <- read.csv(csvFile, header = F, stringsAsFactors = F)
	#row 1 contains the strings that we'd like to use as column names; select row 1 and turn them into characters
	colNames <- as.character(QualtricsRaw[rowAsHeader,])
	summarySE2 <- function (data = NULL, measurevar, groupvars = NULL, na.rm = TRUE, conf.interval = 0.95) {
	library(data.table)
	data <- data.table(data)

	length2 <- function(x, na.rm = FALSE) {
	if (na.rm)
	sum(!is.na(x))
	else length(x)
	}
	def simulate(simulations=1000, n=500, quantile=0.80):

	correlations = np.zeros(simulations)

	for i in range(simulations):

	data = {"personality": np.random.randn(
	n), "attract": np.random.randn(n)}
	df = pd.DataFrame(data)
	df_long = pd.DataFrame(
	{"student": ["Andy", "Bernie", "Cindy", "Deb", "Andy", "Bernie", "Cindy", "Deb", "Andy", "Bernie", "Cindy", "Deb"],
	"school": ["Z", "Y", "Z", "Y", "Z", "Y", "Z", "Y", "Z", "Y", "Z", "Y"],
	"class": ["english", "english", "english", "english", "math", "math", "math", "math", "physics", "physics", "physics", "physics"],
	"grade": [10, 100, 1000, 10000, 20, 200, 2000, 20000, 30, 300, 3000, 30000]
	}
	)

	df_long
	> student school class grade