hackerb9/ttest.bc

## ttest.bc
/* Student's t-test (Two Independent Samples) */		/* -*- c -*- */
/* Implemented in GNU bc by hackerb9, 2018. */
/* Copyright assigned to FSF. */

/*
 *
 *                              µ₀ - µ₁
 * t =  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 *      ⎡                                                     ⎤ ½
 *      ⎢ (ΣA² - (ΣA)²/n₀) + (ΣB² - (ΣB)²/n₁)  	⎛  1     1  ⎞ ⎥
 *      ⎢ ─────────────────────────────────── •	⎜ ─── + ─── ⎟ ⎥
 *      ⎢             n₀ + n₁ - 2              	⎝  n₀    n₁ ⎠ ⎥
 *      ⎣                                                     ⎦
 *
 *
 */


define ttest(a[], alen, b[], blen) {
  auto rv;
  rv  = sumsq(a[], alen);	 /* Add Sum of squares */
  rv += sumsq(b[], blen);	 /* (ΣA²) +  (ΣB²) */
  rv -= sqsum(a[], alen) /alen;	 /* Subtract square of sums / n */
  rv -= sqsum(b[], blen) /blen;	 /* (ΣA²-(ΣA)²/n₀) + (ΣB²-(ΣB)²/n₁) */
  rv /= (alen + blen - 2);	 /* Divide by n₀ + n₁ - 2 */
  rv *= (1/alen + 1/blen);	 /* Multiply by 1/n₀ + 1/n₁ */
  rv  = sqrt(rv);		 /* Take square root of all that */
  rv  = 1/rv;			 /* Put it on the bottom */
  rv *=  (mean(a[], alen) - mean(b[], blen)); /*  Multiply by µ₀ - µ₁ */
  return rv;
}


define tteststdin() {
  /* A nice front end for ttest() that will interactively prompt you
   * for data, or you may put your data in a file and run it like so:
   *
   * 	bc ttest.bc < datafile
   *
   * This is a handy way to load up arrays A and B in bc and run
   * Student's t-test.
   *
   * Here is an example data file:
   *
   *	-1
   *	3
   *	8
   *	2
   *	-1
   *	38
   *	85
   *	34
   *	-1
   *
   * As you can see, the first number read is used as the delimiter to
   * find the end of each data set. I used -1, but you can use any
   * number you want.
   *
   */

  auto x, t, end; /*, a, b, alen, blen;*/

  print "Number to use as dataset delimiter? ";
  end=read();

  print "\rEnter data now or type ", end, " to end data input\n";
  alen=0; blen=0;
  print "A[", alen, "] = "
  x=read();
  while (x != end) {
    a[alen++] = x;
    print "\rA[", alen, "] = "
    x=read();
  }
  print "\rB[", blen, "] = "
  x=read();
  while (x != end) {
    b[blen++] = x;
    print "\rB[", blen, "] = "
    x=read();
  }
  print "\r"

  t=ttest(a[], alen, b[], blen);
  df=alen+blen-2;
  print "t-test result: \n", t, "\n";
  print "Degrees of freedom: ", df, "\n";
  print "Mean of A: ", trunc(mean(a[], alen)), "\t";
  print "Mean of B: ", trunc(mean(b[], blen)), "\n";
  foo=ttable(df, t);		/* lookup p-value significance */
  return t;
}


/* Helper functions */
define sum(x[], n) {
  /* Return the sum of array x with length n  */
  auto rv;
  while (n--) rv+=x[n];
  return rv;
}
define sumsq(x[], n) {
  /* Return the sum of squares */
  auto rv;
  while (n--) rv+=x[n]^2;
  return rv;
}
define sqsum(x[], n) {
  /* Return the square of the sum  */
  return sum(x[], n)^2;
}
define mean(x[], n) {
  /* Return the average of x */
  return sum(x[],n) / n;
}
define abs(x) {
  /* Return absolute value of x */
  if (x>0) return x;
  return -x;
}
define trunc(x) {
  /* Truncate excess decimal zeros by finding the min scale that holds x. */
  auto os; os=scale; scale=0;
  while (x != x/1)
    scale = scale + 1;
  x /= 1;
  scale=os;
  return x;
}

define ttable(df, t) {
  /* t-table to lookup significance given t, the result of the t-test,
   * and df, the "degrees of freedom" (df = n₀ + n₁ - 2).
   *
   * This table is for a two-tailed hypothesis, with alpha = 0.05
   *  or a one-tailed hypothesis, with alpha = 0.025.
   */

  table[1] =     12.706 ;
  table[2] =     4.303  ;
  table[3] =     3.182  ;
  table[4] =     2.776  ;
  table[5] =     2.571  ;
  table[6] =     2.447  ;
  table[7] =     2.365  ;
  table[8] =     2.306  ;
  table[9] =     2.262  ;
  table[10] =    2.228  ;
  table[11] =    2.201  ;
  table[12] =    2.179  ;
  table[13] =    2.160  ;
  table[14] =    2.145  ;
  table[15] =    2.131  ;
  table[16] =    2.120  ;
  table[17] =    2.110  ;
  table[18] =    2.101  ;
  table[19] =    2.093  ;
  table[20] =    2.086  ;
  table[21] =    2.080  ;
  table[22] =    2.074  ;
  table[23] =    2.069  ;
  table[24] =    2.064  ;
  table[25] =    2.060  ;
  table[26] =    2.056  ;
  table[27] =    2.052  ;
  table[28] =    2.048  ;
  table[29] =    2.045  ;
  table[30] =    2.042  ;
  table[60] =    2.000  ;
  table[120] =   1.980  ;
  table[1000] =  1.960  ;

  a=1.960;			/* table[∞] */
  if (df <=1000) a = table[1000];
  if (df <= 120) a = table[120];
  if (df <=  60) a = table[60];
  if (df <=  30) a = table[df];

  t=abs(t);
  if (t>a) {
    print "Means are significantly different (p=0.05).\n";
    return 1;
  }
  if (t<=a) {
    print "p>0.05, so we cannot conclude there is any difference.\n";
    return 0;
  }
}


scale=5;
print "Running ttest on stdin\n";
tteststdin()

quit
	/* Student's t-test (Two Independent Samples) / / -- c -- */
	/* Implemented in GNU bc by hackerb9, 2018. */
	/* Copyright assigned to FSF. */

	/*
	*
	* µ₀ - µ₁
	* t = ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
	* ⎡ ⎤ ½
	* ⎢ (ΣA² - (ΣA)²/n₀) + (ΣB² - (ΣB)²/n₁) ⎛ 1 1 ⎞ ⎥
	* ⎢ ─────────────────────────────────── • ⎜ ─── + ─── ⎟ ⎥
	* ⎢ n₀ + n₁ - 2 ⎝ n₀ n₁ ⎠ ⎥
	* ⎣ ⎦
	*
	*
	*/


	define ttest(a[], alen, b[], blen) {
	auto rv;
	rv = sumsq(a[], alen); /* Add Sum of squares */
	rv += sumsq(b[], blen); /* (ΣA²) + (ΣB²) */
	rv -= sqsum(a[], alen) /alen; /* Subtract square of sums / n */
	rv -= sqsum(b[], blen) /blen; /* (ΣA²-(ΣA)²/n₀) + (ΣB²-(ΣB)²/n₁) */
	rv /= (alen + blen - 2); /* Divide by n₀ + n₁ - 2 */
	rv = (1/alen + 1/blen); / Multiply by 1/n₀ + 1/n₁ */
	rv = sqrt(rv); /* Take square root of all that */
	rv = 1/rv; /* Put it on the bottom */
	rv = (mean(a[], alen) - mean(b[], blen)); / Multiply by µ₀ - µ₁ */
	return rv;
	}



	define tteststdin() {
	/* A nice front end for ttest() that will interactively prompt you
	* for data, or you may put your data in a file and run it like so:
	*
	* bc ttest.bc < datafile
	*
	* This is a handy way to load up arrays A and B in bc and run
	* Student's t-test.
	*
	* Here is an example data file:
	*
	* -1
	* 3
	* 8
	* 2
	* -1
	* 38
	* 85
	* 34
	* -1
	*
	* As you can see, the first number read is used as the delimiter to
	* find the end of each data set. I used -1, but you can use any
	* number you want.
	*
	*/

	auto x, t, end; /, a, b, alen, blen;/

	print "Number to use as dataset delimiter? ";
	end=read();

	print "\rEnter data now or type ", end, " to end data input\n";
	alen=0; blen=0;
	print "A[", alen, "] = "
	x=read();
	while (x != end) {
	a[alen++] = x;
	print "\rA[", alen, "] = "
	x=read();
	}
	print "\rB[", blen, "] = "
	x=read();
	while (x != end) {
	b[blen++] = x;
	print "\rB[", blen, "] = "
	x=read();
	}
	print "\r"

	t=ttest(a[], alen, b[], blen);
	df=alen+blen-2;
	print "t-test result: \n", t, "\n";
	print "Degrees of freedom: ", df, "\n";
	print "Mean of A: ", trunc(mean(a[], alen)), "\t";
	print "Mean of B: ", trunc(mean(b[], blen)), "\n";
	foo=ttable(df, t); /* lookup p-value significance */
	return t;
	}


	/* Helper functions */
	define sum(x[], n) {
	/* Return the sum of array x with length n */
	auto rv;
	while (n--) rv+=x[n];
	return rv;
	}
	define sumsq(x[], n) {
	/* Return the sum of squares */
	auto rv;
	while (n--) rv+=x[n]^2;
	return rv;
	}
	define sqsum(x[], n) {
	/* Return the square of the sum */
	return sum(x[], n)^2;
	}
	define mean(x[], n) {
	/* Return the average of x */
	return sum(x[],n) / n;
	}
	define abs(x) {
	/* Return absolute value of x */
	if (x>0) return x;
	return -x;
	}
	define trunc(x) {
	/* Truncate excess decimal zeros by finding the min scale that holds x. */
	auto os; os=scale; scale=0;
	while (x != x/1)
	scale = scale + 1;
	x /= 1;
	scale=os;
	return x;
	}

	define ttable(df, t) {
	/* t-table to lookup significance given t, the result of the t-test,
	* and df, the "degrees of freedom" (df = n₀ + n₁ - 2).
	*
	* This table is for a two-tailed hypothesis, with alpha = 0.05
	* or a one-tailed hypothesis, with alpha = 0.025.
	*/

	table[1] = 12.706 ;
	table[2] = 4.303 ;
	table[3] = 3.182 ;
	table[4] = 2.776 ;
	table[5] = 2.571 ;
	table[6] = 2.447 ;
	table[7] = 2.365 ;
	table[8] = 2.306 ;
	table[9] = 2.262 ;
	table[10] = 2.228 ;
	table[11] = 2.201 ;
	table[12] = 2.179 ;
	table[13] = 2.160 ;
	table[14] = 2.145 ;
	table[15] = 2.131 ;
	table[16] = 2.120 ;
	table[17] = 2.110 ;
	table[18] = 2.101 ;
	table[19] = 2.093 ;
	table[20] = 2.086 ;
	table[21] = 2.080 ;
	table[22] = 2.074 ;
	table[23] = 2.069 ;
	table[24] = 2.064 ;
	table[25] = 2.060 ;
	table[26] = 2.056 ;
	table[27] = 2.052 ;
	table[28] = 2.048 ;
	table[29] = 2.045 ;
	table[30] = 2.042 ;
	table[60] = 2.000 ;
	table[120] = 1.980 ;
	table[1000] = 1.960 ;

	a=1.960; /* table[∞] */
	if (df <=1000) a = table[1000];
	if (df <= 120) a = table[120];
	if (df <= 60) a = table[60];
	if (df <= 30) a = table[df];

	t=abs(t);
	if (t>a) {
	print "Means are significantly different (p=0.05).\n";
	return 1;
	}
	if (t<=a) {
	print "p>0.05, so we cannot conclude there is any difference.\n";
	return 0;
	}
	}


	scale=5;
	print "Running ttest on stdin\n";
	tteststdin()

	quit