Nick Brown sTeamTraen

## many_corr.R
# Compare multiple correlations to see if they are similar.
# See http://home.ubalt.edu/ntsbarsh/business-stat/opre504.htm#rmulticorr
# By Nick Brown (nicholasjlbrown@gmail.com), September 2022. Licence: CC-0.

r_to_z <- function (r) {
  (log(1 + r) - log(1 - r)) / 2
}

many_corr <- function (vec.z, vec.n) {
  tot1 <- 0

## chi-square-null.R
# Demonstration of chi-square distribution from chi-square tests under the null
# By Nick Brown, February 2022.
# Licence: CC-0

set.seed(2)

df.range <- 1:6      # range of DFs for which to generate plots
loops <- 10000       # number of cases to generate (more gives better precision)
N <- 5000            # range of numbers in tables

## Matta - eTable7.R
# Matta et al., 10.1001/jamainternmed.2021.6454

# See Altman, 2011, 10.1136/bmj.d2304 for calculation of z-scores
# See https://en.wikipedia.org/wiki/Fisher%27s_method#Relation_to_Stouffer's_Z-score_method for Stouffer's method

eTable7 <- data.frame(name="", N=0, OR.m6=0.0, CIlo.m6=0.00, CIhi.m6=0.00, OR.m7=0, CIlo.m7=0.00, CIhi.m7=0.00)
eTable7 <- rbind(eTable7, c("Sleep problems", 54, 0.76, 0.42, 1.36, 0.92, 0.51, 1.67))
eTable7 <- rbind(eTable7, c("Joint pain", 35, 1.38, 0.69, 2.73, 1.40, 0.64, 3.03))
eTable7 <- rbind(eTable7, c("Back pain", 38, 1.58, 0.80, 3.09, 1.24, 0.60, 2.55))
eTable7 <- rbind(eTable7, c("Digestive track problems", 29, 0.61, 0.27, 1.35, 1.16, 0.50, 2.49))

## Matta-T2.R
# Matta et al., 10.1001/jamainternmed.2021.6454

# Table 2

n.s0b0 <- 25271
n.s0b1 <- 461
n.s1b0 <- 638
n.s1b1 <- 453

symptoms <- list(

## Cadegiani-Table1s.R
# Code to analyse Table 1 from some articles about the use of anti-androgens to treat Covid-19.
# By Nick Brown, October 2021.
# Licence: CC-0.
# See https://steamtraen.blogspot.com/2021/10/applying-john-carlisles-table-1.html

# Compute the chi-square p value for a given analysis modality (see the blog post).
mychi <- function (s1, s2, n1, n2, analysis)
{
  mat <- matrix(c(s1, (n1 - s1), s2, (n2 - s2)), ncol=2)

## random-correlation.R
# Simple demonstration of mean absolute correlation with random data.
# By Nick Brown, February 2020. License: CC-0.

library(ggplot2)

set.seed(1)
iter <- 1000

# Create data frame to contain correlation results, excluding CI (which is a list)
ct.names <- names(cor.test(1:3, 3:1))    # little hack: correlation is perfect, so no CI

## Laird-VitD.R
# Reanalysis of Laird et al. (2020) https://pubmed.ncbi.nlm.nih.gov/32603576/
# By Nick Brown, January 2021.
# Licence: CC-0.

# Laird et al. did not state how they went from four (Italy) or three (Spain) measurements
#  of Vitamin D levels to a single figure.
# When I calculate the unweighted mean of their samples, this seems to correspond to the
#  Y-axis values in their figure 1, but I have included the relevant numbers from their
#  Table 1 here to allow a weighted average to be calculated.

## Turkey-lastdigits.R
# Analysis of the last digits of COVID-19 statistics from the Turkish Ministry of Health.
# By Nick Brown, November 2020.
# Licence: CC-0.
#
# The data file is constructed by copy/pasting the table from
#  https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html
# into a plain text file
# An archived copy of that page is at
#  https://web.archive.org/web/20201126134229/https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html)
# If you want to use my data file, it's at http://nick.brown.free.fr/stuff/Turkey-webdata.txt,

## Reynolds2020.R
# Reanalysis of Reynolds et al. (2020) 10.1073/pnas.2014403117
# By Nick Brown, November 2020
# Licence: CC-0
# Dataset file available for download from https://www.pnas.org/content/suppl/2020/11/11/2014403117.DCSupplemental

df <- read.csv("pnas.2014403117.sd01.csv", stringsAsFactors=FALSE)

# Remove cases with missing data on the two outcome measures
df.crp <- df[!is.na(df$elevatedCRP),]
df.ht <- df[!is.na(df$htn),]

## gist:39e2abd53ca280153f4b0926b1aae7f6
# This code simulates a fraudulent scientific experiment.
# First, we generate random data in two conditions with no real difference between the conditions.
# Then, we take the k subjects from each condition whose results are the "worst" for our fraudulent hypothesis,
#  and swap them over.
# For example, let's say that the first group is a drug, and the second is a placebo, and the drug doesn't work.
# The outcome value is how long it takes people to recover from the disease.
# We want that number to be as low as possible for our drug and as high as possible for the placebo.
# So we identify the k (3, 5, whatever) people with the longest recovery time in the drug group, and move them to the placebo group.
# The we move the k people with the shortest recovery time in the placebo group, and move them to the drug group.
# What we find is that with 50 people in each group, swapping 3/5 people gives a p<.05 result 66%/96% of the time.
	# Compare multiple correlations to see if they are similar.
	# See http://home.ubalt.edu/ntsbarsh/business-stat/opre504.htm#rmulticorr
	# By Nick Brown (nicholasjlbrown@gmail.com), September 2022. Licence: CC-0.

	r_to_z <- function (r) {
	(log(1 + r) - log(1 - r)) / 2
	}

	many_corr <- function (vec.z, vec.n) {
	tot1 <- 0
	# Demonstration of chi-square distribution from chi-square tests under the null
	# By Nick Brown, February 2022.
	# Licence: CC-0

	set.seed(2)

	df.range <- 1:6 # range of DFs for which to generate plots
	loops <- 10000 # number of cases to generate (more gives better precision)
	N <- 5000 # range of numbers in tables
	# Matta et al., 10.1001/jamainternmed.2021.6454

	# See Altman, 2011, 10.1136/bmj.d2304 for calculation of z-scores
	# See https://en.wikipedia.org/wiki/Fisher%27s_method#Relation_to_Stouffer's_Z-score_method for Stouffer's method

	eTable7 <- data.frame(name="", N=0, OR.m6=0.0, CIlo.m6=0.00, CIhi.m6=0.00, OR.m7=0, CIlo.m7=0.00, CIhi.m7=0.00)
	eTable7 <- rbind(eTable7, c("Sleep problems", 54, 0.76, 0.42, 1.36, 0.92, 0.51, 1.67))
	eTable7 <- rbind(eTable7, c("Joint pain", 35, 1.38, 0.69, 2.73, 1.40, 0.64, 3.03))
	eTable7 <- rbind(eTable7, c("Back pain", 38, 1.58, 0.80, 3.09, 1.24, 0.60, 2.55))
	eTable7 <- rbind(eTable7, c("Digestive track problems", 29, 0.61, 0.27, 1.35, 1.16, 0.50, 2.49))
	# Matta et al., 10.1001/jamainternmed.2021.6454

	# Table 2

	n.s0b0 <- 25271
	n.s0b1 <- 461
	n.s1b0 <- 638
	n.s1b1 <- 453

	symptoms <- list(
	# Code to analyse Table 1 from some articles about the use of anti-androgens to treat Covid-19.
	# By Nick Brown, October 2021.
	# Licence: CC-0.
	# See https://steamtraen.blogspot.com/2021/10/applying-john-carlisles-table-1.html

	# Compute the chi-square p value for a given analysis modality (see the blog post).
	mychi <- function (s1, s2, n1, n2, analysis)
	{
	mat <- matrix(c(s1, (n1 - s1), s2, (n2 - s2)), ncol=2)
	# Simple demonstration of mean absolute correlation with random data.
	# By Nick Brown, February 2020. License: CC-0.

	library(ggplot2)

	set.seed(1)
	iter <- 1000

	# Create data frame to contain correlation results, excluding CI (which is a list)
	ct.names <- names(cor.test(1:3, 3:1)) # little hack: correlation is perfect, so no CI
	# Reanalysis of Laird et al. (2020) https://pubmed.ncbi.nlm.nih.gov/32603576/
	# By Nick Brown, January 2021.
	# Licence: CC-0.

	# Laird et al. did not state how they went from four (Italy) or three (Spain) measurements
	# of Vitamin D levels to a single figure.
	# When I calculate the unweighted mean of their samples, this seems to correspond to the
	# Y-axis values in their figure 1, but I have included the relevant numbers from their
	# Table 1 here to allow a weighted average to be calculated.
	# Analysis of the last digits of COVID-19 statistics from the Turkish Ministry of Health.
	# By Nick Brown, November 2020.
	# Licence: CC-0.
	#
	# The data file is constructed by copy/pasting the table from
	# https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html
	# into a plain text file
	# An archived copy of that page is at
	# https://web.archive.org/web/20201126134229/https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html)
	# If you want to use my data file, it's at http://nick.brown.free.fr/stuff/Turkey-webdata.txt,
	# Reanalysis of Reynolds et al. (2020) 10.1073/pnas.2014403117
	# By Nick Brown, November 2020
	# Licence: CC-0
	# Dataset file available for download from https://www.pnas.org/content/suppl/2020/11/11/2014403117.DCSupplemental

	df <- read.csv("pnas.2014403117.sd01.csv", stringsAsFactors=FALSE)

	# Remove cases with missing data on the two outcome measures
	df.crp <- df[!is.na(df$elevatedCRP),]
	df.ht <- df[!is.na(df$htn),]
	# This code simulates a fraudulent scientific experiment.
	# First, we generate random data in two conditions with no real difference between the conditions.
	# Then, we take the k subjects from each condition whose results are the "worst" for our fraudulent hypothesis,
	# and swap them over.
	# For example, let's say that the first group is a drug, and the second is a placebo, and the drug doesn't work.
	# The outcome value is how long it takes people to recover from the disease.
	# We want that number to be as low as possible for our drug and as high as possible for the placebo.
	# So we identify the k (3, 5, whatever) people with the longest recovery time in the drug group, and move them to the placebo group.
	# The we move the k people with the shortest recovery time in the placebo group, and move them to the drug group.
	# What we find is that with 50 people in each group, swapping 3/5 people gives a p<.05 result 66%/96% of the time.