Nick Brown sTeamTraen

## Effect-size-D.R
set.seed(1)

# Number of datasets to generate.
iter <- 1000

# Generate random sample sizes.
# Initially I only put the Nmin parameter there to avoid silly sizes like 0 or 1,
#  but then I found that something interesting happens.
# Try setting Nmean to 10 instead of 50, so that all sample sizes are 20, and watch
#  what happens to the correlation between d and its SE.

## MaghbooliFig1.csv

          
            Num
            X
            Y
            Age
            VitD
            Status

            
              1
              248
              767
              37
              107.59
              A

            
              2
              552
              830
              60
              118.45
              A

            
              3
              870
              524
              84
              65.69
              A

            
              4
              777
              511
              77
              63.45
              A

            
              5
              301
              471
              41
              56.55
              A

            
              6
              420
              464
              50
              55.34
              A

            
              7
              486
              513
              55
              63.79
              A

            
              8
              579
              502
              62
              61.90
              A

            
              9
              632
              484
              66
              58.79
              A

## gist:39e2abd53ca280153f4b0926b1aae7f6
# This code simulates a fraudulent scientific experiment.
# First, we generate random data in two conditions with no real difference between the conditions.
# Then, we take the k subjects from each condition whose results are the "worst" for our fraudulent hypothesis,
#  and swap them over.
# For example, let's say that the first group is a drug, and the second is a placebo, and the drug doesn't work.
# The outcome value is how long it takes people to recover from the disease.
# We want that number to be as low as possible for our drug and as high as possible for the placebo.
# So we identify the k (3, 5, whatever) people with the longest recovery time in the drug group, and move them to the placebo group.
# The we move the k people with the shortest recovery time in the placebo group, and move them to the drug group.
# What we find is that with 50 people in each group, swapping 3/5 people gives a p<.05 result 66%/96% of the time.

## Reynolds2020.R
# Reanalysis of Reynolds et al. (2020) 10.1073/pnas.2014403117
# By Nick Brown, November 2020
# Licence: CC-0
# Dataset file available for download from https://www.pnas.org/content/suppl/2020/11/11/2014403117.DCSupplemental

df <- read.csv("pnas.2014403117.sd01.csv", stringsAsFactors=FALSE)

# Remove cases with missing data on the two outcome measures
df.crp <- df[!is.na(df$elevatedCRP),]
df.ht <- df[!is.na(df$htn),]

## Turkey-lastdigits.R
# Analysis of the last digits of COVID-19 statistics from the Turkish Ministry of Health.
# By Nick Brown, November 2020.
# Licence: CC-0.
#
# The data file is constructed by copy/pasting the table from
#  https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html
# into a plain text file
# An archived copy of that page is at
#  https://web.archive.org/web/20201126134229/https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html)
# If you want to use my data file, it's at http://nick.brown.free.fr/stuff/Turkey-webdata.txt,

## Laird-VitD.R
# Reanalysis of Laird et al. (2020) https://pubmed.ncbi.nlm.nih.gov/32603576/
# By Nick Brown, January 2021.
# Licence: CC-0.

# Laird et al. did not state how they went from four (Italy) or three (Spain) measurements
#  of Vitamin D levels to a single figure.
# When I calculate the unweighted mean of their samples, this seems to correspond to the
#  Y-axis values in their figure 1, but I have included the relevant numbers from their
#  Table 1 here to allow a weighted average to be calculated.

## random-correlation.R
# Simple demonstration of mean absolute correlation with random data.
# By Nick Brown, February 2020. License: CC-0.

library(ggplot2)

set.seed(1)
iter <- 1000

# Create data frame to contain correlation results, excluding CI (which is a list)
ct.names <- names(cor.test(1:3, 3:1))    # little hack: correlation is perfect, so no CI

## Cadegiani-Table1s.R
# Code to analyse Table 1 from some articles about the use of anti-androgens to treat Covid-19.
# By Nick Brown, October 2021.
# Licence: CC-0.
# See https://steamtraen.blogspot.com/2021/10/applying-john-carlisles-table-1.html

# Compute the chi-square p value for a given analysis modality (see the blog post).
mychi <- function (s1, s2, n1, n2, analysis)
{
  mat <- matrix(c(s1, (n1 - s1), s2, (n2 - s2)), ncol=2)

## Matta-T2.R
# Matta et al., 10.1001/jamainternmed.2021.6454

# Table 2

n.s0b0 <- 25271
n.s0b1 <- 461
n.s1b0 <- 638
n.s1b1 <- 453

symptoms <- list(

## Matta - eTable7.R
# Matta et al., 10.1001/jamainternmed.2021.6454

# See Altman, 2011, 10.1136/bmj.d2304 for calculation of z-scores
# See https://en.wikipedia.org/wiki/Fisher%27s_method#Relation_to_Stouffer's_Z-score_method for Stouffer's method

eTable7 <- data.frame(name="", N=0, OR.m6=0.0, CIlo.m6=0.00, CIhi.m6=0.00, OR.m7=0, CIlo.m7=0.00, CIhi.m7=0.00)
eTable7 <- rbind(eTable7, c("Sleep problems", 54, 0.76, 0.42, 1.36, 0.92, 0.51, 1.67))
eTable7 <- rbind(eTable7, c("Joint pain", 35, 1.38, 0.69, 2.73, 1.40, 0.64, 3.03))
eTable7 <- rbind(eTable7, c("Back pain", 38, 1.58, 0.80, 3.09, 1.24, 0.60, 2.55))
eTable7 <- rbind(eTable7, c("Digestive track problems", 29, 0.61, 0.27, 1.35, 1.16, 0.50, 2.49))
	set.seed(1)

	# Number of datasets to generate.
	iter <- 1000

	# Generate random sample sizes.
	# Initially I only put the Nmin parameter there to avoid silly sizes like 0 or 1,
	# but then I found that something interesting happens.
	# Try setting Nmean to 10 instead of 50, so that all sample sizes are 20, and watch
	# what happens to the correlation between d and its SE.
Num	X	Y	Age	VitD	Status
1	248	767	37	107.59	A
2	552	830	60	118.45	A
3	870	524	84	65.69	A
4	777	511	77	63.45	A
5	301	471	41	56.55	A
6	420	464	50	55.34	A
7	486	513	55	63.79	A
8	579	502	62	61.90	A
9	632	484	66	58.79	A
	# This code simulates a fraudulent scientific experiment.
	# First, we generate random data in two conditions with no real difference between the conditions.
	# Then, we take the k subjects from each condition whose results are the "worst" for our fraudulent hypothesis,
	# and swap them over.
	# For example, let's say that the first group is a drug, and the second is a placebo, and the drug doesn't work.
	# The outcome value is how long it takes people to recover from the disease.
	# We want that number to be as low as possible for our drug and as high as possible for the placebo.
	# So we identify the k (3, 5, whatever) people with the longest recovery time in the drug group, and move them to the placebo group.
	# The we move the k people with the shortest recovery time in the placebo group, and move them to the drug group.
	# What we find is that with 50 people in each group, swapping 3/5 people gives a p<.05 result 66%/96% of the time.
	# Reanalysis of Reynolds et al. (2020) 10.1073/pnas.2014403117
	# By Nick Brown, November 2020
	# Licence: CC-0
	# Dataset file available for download from https://www.pnas.org/content/suppl/2020/11/11/2014403117.DCSupplemental

	df <- read.csv("pnas.2014403117.sd01.csv", stringsAsFactors=FALSE)

	# Remove cases with missing data on the two outcome measures
	df.crp <- df[!is.na(df$elevatedCRP),]
	df.ht <- df[!is.na(df$htn),]
	# Analysis of the last digits of COVID-19 statistics from the Turkish Ministry of Health.
	# By Nick Brown, November 2020.
	# Licence: CC-0.
	#
	# The data file is constructed by copy/pasting the table from
	# https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html
	# into a plain text file
	# An archived copy of that page is at
	# https://web.archive.org/web/20201126134229/https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html)
	# If you want to use my data file, it's at http://nick.brown.free.fr/stuff/Turkey-webdata.txt,
	# Reanalysis of Laird et al. (2020) https://pubmed.ncbi.nlm.nih.gov/32603576/
	# By Nick Brown, January 2021.
	# Licence: CC-0.

	# Laird et al. did not state how they went from four (Italy) or three (Spain) measurements
	# of Vitamin D levels to a single figure.
	# When I calculate the unweighted mean of their samples, this seems to correspond to the
	# Y-axis values in their figure 1, but I have included the relevant numbers from their
	# Table 1 here to allow a weighted average to be calculated.
	# Simple demonstration of mean absolute correlation with random data.
	# By Nick Brown, February 2020. License: CC-0.

	library(ggplot2)

	set.seed(1)
	iter <- 1000

	# Create data frame to contain correlation results, excluding CI (which is a list)
	ct.names <- names(cor.test(1:3, 3:1)) # little hack: correlation is perfect, so no CI
	# Code to analyse Table 1 from some articles about the use of anti-androgens to treat Covid-19.
	# By Nick Brown, October 2021.
	# Licence: CC-0.
	# See https://steamtraen.blogspot.com/2021/10/applying-john-carlisles-table-1.html

	# Compute the chi-square p value for a given analysis modality (see the blog post).
	mychi <- function (s1, s2, n1, n2, analysis)
	{
	mat <- matrix(c(s1, (n1 - s1), s2, (n2 - s2)), ncol=2)
	# Matta et al., 10.1001/jamainternmed.2021.6454

	# Table 2

	n.s0b0 <- 25271
	n.s0b1 <- 461
	n.s1b0 <- 638
	n.s1b1 <- 453

	symptoms <- list(
	# Matta et al., 10.1001/jamainternmed.2021.6454

	# See Altman, 2011, 10.1136/bmj.d2304 for calculation of z-scores
	# See https://en.wikipedia.org/wiki/Fisher%27s_method#Relation_to_Stouffer's_Z-score_method for Stouffer's method

	eTable7 <- data.frame(name="", N=0, OR.m6=0.0, CIlo.m6=0.00, CIhi.m6=0.00, OR.m7=0, CIlo.m7=0.00, CIhi.m7=0.00)
	eTable7 <- rbind(eTable7, c("Sleep problems", 54, 0.76, 0.42, 1.36, 0.92, 0.51, 1.67))
	eTable7 <- rbind(eTable7, c("Joint pain", 35, 1.38, 0.69, 2.73, 1.40, 0.64, 3.03))
	eTable7 <- rbind(eTable7, c("Back pain", 38, 1.58, 0.80, 3.09, 1.24, 0.60, 2.55))
	eTable7 <- rbind(eTable7, c("Digestive track problems", 29, 0.61, 0.27, 1.35, 1.16, 0.50, 2.49))