sTeamTraen/Turkey-lastdigits.R

## Turkey-lastdigits.R
# Analysis of the last digits of COVID-19 statistics from the Turkish Ministry of Health.
# By Nick Brown, November 2020.
# Licence: CC-0.
#
# The data file is constructed by copy/pasting the table from
#  https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html
# into a plain text file
# An archived copy of that page is at
#  https://web.archive.org/web/20201126134229/https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html)
# If you want to use my data file, it's at http://nick.brown.free.fr/stuff/Turkey-webdata.txt,
#  but I encourage you to build your own file to check my work.

# Read the data in as plain text.
# We use a dummy decimal separator because the "Pneumonia" field (at least) contains
#  both commas and periods, so we want to do all our numeric conversion by hand.
df <- read.delim("Turkey-webdata.txt", dec="#")
oldcols <- ncol(df)

# Remove periods (used as thousands separators) and convert the fields of interest
#  (i.e., daily new cases, daily tests, daily deaths, and daily recovered patients) to numbers.
df$Cases <- as.integer(gsub("\\.", "", df$Number.of.Patients.for.Today))
df$Tests <- as.integer(gsub("\\.", "", df$Number.of.Tests.for.Today))
df$Deaths <- as.integer(gsub("\\.", "", df$Number.of.Deaths.for.Today))
df$Recovered <- as.integer(gsub("\\.", "", df$Number.of.Recovered.Patients.for.Today))

cat("Item\t\tChi-square\tp\n")
for (i in (oldcols+1):ncol(df)) {    # loop over the columns that we added
  N <- df[i][!is.na(df[i])]
  dist <- table(N %% 10)
  chisq <- chisq.test(dist)

  title <- names(df[i])
  if (nchar(title) < 8) {
    title <- paste(title, "\t", sep="")
  }
  stat <- sprintf("%7.3f", chisq$statistic)
  p <- sprintf("%-12.7f", chisq$p.value)

  cat(title, "\t", stat, "\t\t", p, "\n", sep="")
}
	# Analysis of the last digits of COVID-19 statistics from the Turkish Ministry of Health.
	# By Nick Brown, November 2020.
	# Licence: CC-0.
	#
	# The data file is constructed by copy/pasting the table from
	# https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html
	# into a plain text file
	# An archived copy of that page is at
	# https://web.archive.org/web/20201126134229/https://covid19.saglik.gov.tr/EN-69532/general-coronavirus-table.html)
	# If you want to use my data file, it's at http://nick.brown.free.fr/stuff/Turkey-webdata.txt,
	# but I encourage you to build your own file to check my work.

	# Read the data in as plain text.
	# We use a dummy decimal separator because the "Pneumonia" field (at least) contains
	# both commas and periods, so we want to do all our numeric conversion by hand.
	df <- read.delim("Turkey-webdata.txt", dec="#")
	oldcols <- ncol(df)

	# Remove periods (used as thousands separators) and convert the fields of interest
	# (i.e., daily new cases, daily tests, daily deaths, and daily recovered patients) to numbers.
	df$Cases <- as.integer(gsub("\\.", "", df$Number.of.Patients.for.Today))
	df$Tests <- as.integer(gsub("\\.", "", df$Number.of.Tests.for.Today))
	df$Deaths <- as.integer(gsub("\\.", "", df$Number.of.Deaths.for.Today))
	df$Recovered <- as.integer(gsub("\\.", "", df$Number.of.Recovered.Patients.for.Today))

	cat("Item\t\tChi-square\tp\n")
	for (i in (oldcols+1):ncol(df)) { # loop over the columns that we added
	N <- df[i][!is.na(df[i])]
	dist <- table(N %% 10)
	chisq <- chisq.test(dist)

	title <- names(df[i])
	if (nchar(title) < 8) {
	title <- paste(title, "\t", sep="")
	}
	stat <- sprintf("%7.3f", chisq$statistic)
	p <- sprintf("%-12.7f", chisq$p.value)

	cat(title, "\t", stat, "\t\t", p, "\n", sep="")
	}