Martin Papenberg m-Py

## rndSequence.js
rndSequence = function(length) {
  options = length;
  option_sequence = [];
  for (var i = 0; i < options; i++) {
    if (option_sequence.length === 0) {
        var rnd = Math.floor(Math.random()*options);
        option_sequence.push(rnd);
    }
    else if (option_sequence.length > 0) {
        var rnd = Math.floor(Math.random()*options);

## test_q_distribution.r
# in this program I test the sampling distribution of Cohen's q

library("MASS")

# sample sizes to generate correlation coefficients
n1  <- 100
n2  <- 100
sml <- 20000    # number of simulations
qs  <- vector(length=length(sml))

## wordCountRmdFile.R
# This function reads a Rmd file and returns the word count
# It uses the wordcountaddin and koRpus packages
text_stats_file <- function(rmdFile) {
  rmd <- file(rmdFile, "rt")
  text <- readLines(rmd)
  conText <- ""
  for (i in text) {
    conText <- paste(conText, i)
  }
  close(rmd)

## ordinal_scores.R
## Author Martin Papenberg
## Year 2018

## This code is released into the public domain. Anybody may use, alter
## and distribute the code without restriction. The author makes no
## guarantees, and takes no liability of any kind for use of this code.

#' Compute ordinal scores from continuous data
#'
#' Might be useful for data exploration with highly skewed data

## SIX_OUT_OF_THIRTY.R
## Warning: This code is just for fun / educational purposes; the file contains functions
## to find out how severely the p value in a t-test can be minimized by systematic removal of data points.

## SIX OUT OF THIRTY - Martin's approach
## Based on @juli_tkotz's (https://twitter.com/juli_tkotz/status/1085446224117985281)
## idea that removing from the most extreme values is the best apporach.


#' Simulate t-tests and store best p values
#'

## covariate_regression.R


## This document illustrates that type 1 sum of squares lead to increased alpha
## error rates when a predictive covariate is included in the regression model.


# Estimate p-value for treatment (null) effect via linear regression,
# including a covariate that is predictive of the outcome
#
# param N: sample size, default 100

## KNN_RANN.R
# Author: Martin Papenberg
# Year: 2019

# Perform fast KNN classifier using RANN for nearest neighbour search

library("RANN")
library("data.table")

# param data: The numeric data matrix used
# param labels: the labels to predict

## correlated_data.R
## Year 2019 - 2020
## Author: Martin Papenberg

## This code is in the public domain, do with it whatever you like.
# Generate bivariate normal data with specified correlation

# param n: how many data points
# param mx: the mean of the first variable
# param my: the mean of the second variable
# param sdx: the standard deviation of the first variable

## simulate_glm.R

# Show that interaction in glm() changes nature of main effect
# (only if a categorical predictor is dummy coded - not contrast coded)

# Returns the p-value associated with a predictor main effect, once
# with and once without interaction with a (non-predictive) categorical
# independent variable

simulate_glm <- function(N = 100, contrast_coding = FALSE) {
  iv1 <- rnorm(N) # related to DV

## test_anticlust.R
## 1. Load - and, if required, install - package `anticlust`

if (!requireNamespace("remotes")) {
  install.packages("remotes")
}
remotes::install_github("m-Py/anticlust")

library(anticlust)
	rndSequence = function(length) {
	options = length;
	option_sequence = [];
	for (var i = 0; i < options; i++) {
	if (option_sequence.length === 0) {
	var rnd = Math.floor(Math.random()*options);
	option_sequence.push(rnd);
	}
	else if (option_sequence.length > 0) {
	var rnd = Math.floor(Math.random()*options);
	# in this program I test the sampling distribution of Cohen's q

	library("MASS")

	# sample sizes to generate correlation coefficients
	n1 <- 100
	n2 <- 100
	sml <- 20000 # number of simulations
	qs <- vector(length=length(sml))
	# This function reads a Rmd file and returns the word count
	# It uses the wordcountaddin and koRpus packages
	text_stats_file <- function(rmdFile) {
	rmd <- file(rmdFile, "rt")
	text <- readLines(rmd)
	conText <- ""
	for (i in text) {
	conText <- paste(conText, i)
	}
	close(rmd)
	## Author Martin Papenberg
	## Year 2018

	## This code is released into the public domain. Anybody may use, alter
	## and distribute the code without restriction. The author makes no
	## guarantees, and takes no liability of any kind for use of this code.

	#' Compute ordinal scores from continuous data
	#'
	#' Might be useful for data exploration with highly skewed data
	## Warning: This code is just for fun / educational purposes; the file contains functions
	## to find out how severely the p value in a t-test can be minimized by systematic removal of data points.

	## SIX OUT OF THIRTY - Martin's approach
	## Based on @juli_tkotz's (https://twitter.com/juli_tkotz/status/1085446224117985281)
	## idea that removing from the most extreme values is the best apporach.


	#' Simulate t-tests and store best p values
	#'


	## This document illustrates that type 1 sum of squares lead to increased alpha
	## error rates when a predictive covariate is included in the regression model.


	# Estimate p-value for treatment (null) effect via linear regression,
	# including a covariate that is predictive of the outcome
	#
	# param N: sample size, default 100
	# Author: Martin Papenberg
	# Year: 2019

	# Perform fast KNN classifier using RANN for nearest neighbour search

	library("RANN")
	library("data.table")

	# param data: The numeric data matrix used
	# param labels: the labels to predict
	## Year 2019 - 2020
	## Author: Martin Papenberg

	## This code is in the public domain, do with it whatever you like.
	# Generate bivariate normal data with specified correlation

	# param n: how many data points
	# param mx: the mean of the first variable
	# param my: the mean of the second variable
	# param sdx: the standard deviation of the first variable

	# Show that interaction in glm() changes nature of main effect
	# (only if a categorical predictor is dummy coded - not contrast coded)

	# Returns the p-value associated with a predictor main effect, once
	# with and once without interaction with a (non-predictive) categorical
	# independent variable

	simulate_glm <- function(N = 100, contrast_coding = FALSE) {
	iv1 <- rnorm(N) # related to DV
	## 1. Load - and, if required, install - package `anticlust`

	if (!requireNamespace("remotes")) {
	install.packages("remotes")
	}
	remotes::install_github("m-Py/anticlust")

	library(anticlust)