masnick/compare_2_sirs.py

## compare_2_sirs.py
"""
Compare 2 SIRs by exact binomial and mid-p
==========================================

This is a conversion of SAS code written by Minn Soe and published by the CDC.
The original SAS code is available here: http://www.cdc.gov/nhsn/sas/binom.sas

Author: Max Masnick, PhD (https://masnick.org/contact)
Last modified: 2016-04-28


Installation
------------

The only dependency is `scipy`, which can be installed with `pip install scipy`.


Running
-------

This file is set up to run a number of tests to verify the accuracy of the code
translation. To do this, run:

    python -m unittest compare_sirs_cdc

To use the `compare_sirs(...)` method in your own code, you'll want to extract
that method and the supporting `BinP_method()` and `BinP2_method()`.

Then simply call `compare_sirs(observed1, expected1, observed2, expected2)`.
This method will return a tuple as described in the comment inside the method
below.


Disclaimer
---------

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

"""
from scipy.stats import binom
import unittest

def BinP_method(x1, x2, PP, N):
    q = PP * 1.0 / (1 - PP)
    k = 0
    v = 1
    s = 0
    tot = 0
    while (k<=N):
        tot += v
        if k>= x1 and k <= x2:
            s += v
        if tot > 10 ** 30:
            s = s * 1.0 / (10**30)
            tot = tot * 1.0 / (10**30)
            v = v * 1.0 / (10**30)
        k += 1
        v = v * 1.0 * q * (N + 1 - k) / k
    return s * 1.0 / tot

def BinP2_method(x1, x2, PP, N):
    return BinP_method(x1, x2, PP, N) * 0.5

def compare_sirs(observed1, expected1, observed2, expected2):
    """
    Based on http://www.cdc.gov/nhsn/sas/binom.sas

    Returns: tuple with the following elements
        1. Two-tailed P value
        2. Ratio of SIRs
        3. Lower 95% CI limit
        4. Upper 95% CI limit
    """

    # Can't run if both observed values are 0
    if observed1 == 0 and observed2 == 0:
        return (None, None, None, None)

    # Can't run if expecteds are < 1
    if expected1 < 1 or expected2 < 1:
        return (None, None, None, None)

    vN = observed1 + observed2
    vP = observed2 * 1.0 / vN

    ratio1 = observed1 * 1.0 / expected1
    ratio2 = observed2 * 1.0 / expected2
    O = observed1 + observed2
    T = expected1 + expected2

    # Take the larger of the two SIRs
    if ratio1 >= ratio2:
        # probbnml(0.5, 10, 4) = 0.37695 -> binom.cdf(4, 10, 0.5) = 0.37695
        #            p   n  x                         x   n    p
        p = expected1 * 1.0 / T
        n = O
        x = observed1
    else:
        p = expected2 * 1.0 / T
        n = O
        x = observed2

    # Testing for Ha: RR>1
    p1 = 1 - binom.cdf(x, n, p)
    p2 = 0.5 * (binom.cdf(x, n, p) - binom.cdf(x-1, n, p))
    p3 = p1 + p2

    midP = 2 * p3
    if midP > 1:
        midP = 1
    if midP < 0:
        midP = 0

    if observed1 != 0: # prevents division by 0 when ratio1=0
        ratio = (observed2 * 1.0 / expected2) / (observed1 * 1.0 / expected1)

        if observed2 == 0:
            dl = 0;
        else:
            p2 = vP * 1.0 / 2
            vsL = 0
            vsH = vP
            p = 2.5/100

            while (vsH - vsL) > (10**-5):
                binP = BinP_method(x1=observed2 + 1, x2=vN, PP=p2, N=vN)
                binP2 = BinP2_method(x1=observed2, x2=observed2, PP=p2, N=vN)
                if binP + binP2 > p:
                    vsH = p2
                    p2 = (vsL + p2) * 1.0 / 2
                else:
                    vsL = p2
                    p2 = (vsH + p2) * 1.0 / 2
            dl = p2

        if observed2 == vN:
            du = 1
        else:
            p3 = (1 + vP) * 1.0 / 2
            vsL = vP
            vsH = 1
            p = 2.5/100
            while (vsH - vsL) > (10**-5):
                binP = BinP_method(x1=0, x2=observed2-1, PP=p3, N=vN)
                binP2 = BinP2_method(x1=observed2, x2=observed2, PP=p3, N=vN)
                if binP + binP2 < p:
                    vsH = p3
                    p3 = (vsL + p3) * 1.0 / 2
                else:
                    vsL = p3
                    p3 = (vsH + p3) * 1.0 / 2
            du = p3

        ll = round((dl * expected1 * 1.0)/((1-dl)*expected2), 3)
        ul = round((du * expected1 * 1.0)/((1-du)*expected2), 3)

        return(midP, ratio, ll, ul)
    return(midP, None, None, None )


class TestSIRCompare(unittest.TestCase):
    def setUp(self):
        self.test_data = [
            # {'o1': 0    , 'e1': 0.079  , 'o2': 1    , 'e2': 0.079  , 'ratio': None    , 'ci_lower': None    , 'ci_upper': None    , 'macro_p': None    , } ,
            # {'o1': 0    , 'e1': 0.926  , 'o2': 1    , 'e2': 0.926  , 'ratio': None    , 'ci_lower': None    , 'ci_upper': None    , 'macro_p': None    , } ,
            # {'o1': 0    , 'e1': 0.079  , 'o2': 2    , 'e2': 1      , 'ratio': None    , 'ci_lower': None    , 'ci_upper': None    , 'macro_p': None    , } ,
            # {'o1': 0    , 'e1': 0.079  , 'o2': 1    , 'e2': 1.043  , 'ratio': None    , 'ci_lower': None    , 'ci_upper': None    , 'macro_p': None    , } ,
            # {'o1': 1    , 'e1': 1.043  , 'o2': 0    , 'e2': 0.079  , 'ratio': None    , 'ci_lower': None    , 'ci_upper': None    , 'macro_p': None    , } ,
            # {'o1': None , 'e1': 10     , 'o2': 10   , 'e2': 10     , 'ratio': None    , 'ci_lower': None    , 'ci_upper': None    , 'macro_p': None    , } ,
            # {'o1': 10   , 'e1': None   , 'o2': 10   , 'e2': 10     , 'ratio': None    , 'ci_lower': None    , 'ci_upper': None    , 'macro_p': None    , } ,
            # {'o1': 10   , 'e1': 10     , 'o2': None , 'e2': 10     , 'ratio': None    , 'ci_lower': None    , 'ci_upper': None    , 'macro_p': None    , } ,
            # {'o1': 10   , 'e1': 10     , 'o2': 10   , 'e2': None   , 'ratio': None    , 'ci_lower': None    , 'ci_upper': None    , 'macro_p': None    , } ,
            {'o1': 35   , 'e1': 9.048  , 'o2': 10   , 'e2': 9.048  , 'ratio': 0.28571 , 'ci_lower': 0.13477 , 'ci_upper': 0.5636  , 'macro_p': 0.00016 , } ,
            {'o1': 2    , 'e1': 1.006  , 'o2': 1    , 'e2': 1.006  , 'ratio': 0.5     , 'ci_lower': 0.01695 , 'ci_upper': 6.5736  , 'macro_p': 0.625   , } ,
            {'o1': 3    , 'e1': 4.649  , 'o2': 2    , 'e2': 4.649  , 'ratio': 0.66667 , 'ci_lower': 0.07928 , 'ci_upper': 4.4831  , 'macro_p': 0.6875  , } ,
            {'o1': 5    , 'e1': 4.202  , 'o2': 4    , 'e2': 4.202  , 'ratio': 0.8     , 'ci_lower': 0.19114 , 'ci_upper': 3.1593  , 'macro_p': 0.75391 , } ,
            {'o1': 20   , 'e1': 13.469 , 'o2': 16   , 'e2': 13.469 , 'ratio': 0.8     , 'ci_lower': 0.40762 , 'ci_upper': 1.5506  , 'macro_p': 0.51138 , } ,
            {'o1': 9    , 'e1': 20.59  , 'o2': 8    , 'e2': 20.59  , 'ratio': 0.88889 , 'ci_lower': 0.32947 , 'ci_upper': 2.3631  , 'macro_p': 0.81453 , } ,
            {'o1': 18   , 'e1': 11.262 , 'o2': 16   , 'e2': 11.262 , 'ratio': 0.88889 , 'ci_lower': 0.44658 , 'ci_upper': 1.757   , 'macro_p': 0.73588 , } ,
            {'o1': 1    , 'e1': 3.168  , 'o2': 1    , 'e2': 3.168  , 'ratio': 1       , 'ci_lower': 0.02564 , 'ci_upper': 39.0037 , 'macro_p': 1       , } ,
            {'o1': 1    , 'e1': 2.59   , 'o2': 2    , 'e2': 2.59   , 'ratio': 2       , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625   , } ,
            {'o1': 1    , 'e1': 1.527  , 'o2': 2    , 'e2': 1.527  , 'ratio': 2       , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625   , } ,
            {'o1': 1    , 'e1': 1.006  , 'o2': 2    , 'e2': 1.006  , 'ratio': 2       , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625   , } ,
            {'o1': 6    , 'e1': 13.47  , 'o2': 7    , 'e2': 6.79   , 'ratio': 2.31443 , 'ci_lower': 0.74998 , 'ci_upper': 7.3312  , 'macro_p': 0.14197 , } ,
            {'o1': 10   , 'e1': 9.048  , 'o2': 35   , 'e2': 9.048  , 'ratio': 3.5     , 'ci_lower': 1.77426 , 'ci_upper': 7.42    , 'macro_p': 0.00016 , }
        ]

    def test_all(self):
        for d in self.test_data:
            # Data to test
            t = [v for v in compare_sirs(d['o1'], d['e1'], d['o2'], d['e2'])]

            # Reference data
            a = [d['macro_p'], d['ratio'], d['ci_lower'], d['ci_upper']]

            # Fix some rounding things
            if t[0] is not None:
                t[0] = round(t[0], 5)
            if t[1] is not None:
                t[1] = round(t[1], 5)
            if a[2] is not None:
                a[2] = round(a[2], 3)
            if a[3] is not None:
                a[3] = round(a[3], 3)

            self.assertEqual(t, a)


if __name__ == '__main__':
    unittest.main()
	"""
	Compare 2 SIRs by exact binomial and mid-p
	==========================================

	This is a conversion of SAS code written by Minn Soe and published by the CDC.
	The original SAS code is available here: http://www.cdc.gov/nhsn/sas/binom.sas

	Author: Max Masnick, PhD (https://masnick.org/contact)
	Last modified: 2016-04-28


	Installation
	------------

	The only dependency is `scipy`, which can be installed with `pip install scipy`.


	Running
	-------

	This file is set up to run a number of tests to verify the accuracy of the code
	translation. To do this, run:

	python -m unittest compare_sirs_cdc

	To use the `compare_sirs(...)` method in your own code, you'll want to extract
	that method and the supporting `BinP_method()` and `BinP2_method()`.

	Then simply call `compare_sirs(observed1, expected1, observed2, expected2)`.
	This method will return a tuple as described in the comment inside the method
	below.


	Disclaimer
	---------

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	THE SOFTWARE.

	"""
	from scipy.stats import binom
	import unittest

	def BinP_method(x1, x2, PP, N):
	q = PP * 1.0 / (1 - PP)
	k = 0
	v = 1
	s = 0
	tot = 0
	while (k<=N):
	tot += v
	if k>= x1 and k <= x2:
	s += v
	if tot > 10 ** 30:
	s = s * 1.0 / (10**30)
	tot = tot * 1.0 / (10**30)
	v = v * 1.0 / (10**30)
	k += 1
	v = v * 1.0 * q * (N + 1 - k) / k
	return s * 1.0 / tot

	def BinP2_method(x1, x2, PP, N):
	return BinP_method(x1, x2, PP, N) * 0.5

	def compare_sirs(observed1, expected1, observed2, expected2):
	"""
	Based on http://www.cdc.gov/nhsn/sas/binom.sas

	Returns: tuple with the following elements
	1. Two-tailed P value
	2. Ratio of SIRs
	3. Lower 95% CI limit
	4. Upper 95% CI limit
	"""

	# Can't run if both observed values are 0
	if observed1 == 0 and observed2 == 0:
	return (None, None, None, None)

	# Can't run if expecteds are < 1
	if expected1 < 1 or expected2 < 1:
	return (None, None, None, None)

	vN = observed1 + observed2
	vP = observed2 * 1.0 / vN

	ratio1 = observed1 * 1.0 / expected1
	ratio2 = observed2 * 1.0 / expected2
	O = observed1 + observed2
	T = expected1 + expected2

	# Take the larger of the two SIRs
	if ratio1 >= ratio2:
	# probbnml(0.5, 10, 4) = 0.37695 -> binom.cdf(4, 10, 0.5) = 0.37695
	# p n x x n p
	p = expected1 * 1.0 / T
	n = O
	x = observed1
	else:
	p = expected2 * 1.0 / T
	n = O
	x = observed2

	# Testing for Ha: RR>1
	p1 = 1 - binom.cdf(x, n, p)
	p2 = 0.5 * (binom.cdf(x, n, p) - binom.cdf(x-1, n, p))
	p3 = p1 + p2

	midP = 2 * p3
	if midP > 1:
	midP = 1
	if midP < 0:
	midP = 0

	if observed1 != 0: # prevents division by 0 when ratio1=0
	ratio = (observed2 * 1.0 / expected2) / (observed1 * 1.0 / expected1)

	if observed2 == 0:
	dl = 0;
	else:
	p2 = vP * 1.0 / 2
	vsL = 0
	vsH = vP
	p = 2.5/100

	while (vsH - vsL) > (10**-5):
	binP = BinP_method(x1=observed2 + 1, x2=vN, PP=p2, N=vN)
	binP2 = BinP2_method(x1=observed2, x2=observed2, PP=p2, N=vN)
	if binP + binP2 > p:
	vsH = p2
	p2 = (vsL + p2) * 1.0 / 2
	else:
	vsL = p2
	p2 = (vsH + p2) * 1.0 / 2
	dl = p2

	if observed2 == vN:
	du = 1
	else:
	p3 = (1 + vP) * 1.0 / 2
	vsL = vP
	vsH = 1
	p = 2.5/100
	while (vsH - vsL) > (10**-5):
	binP = BinP_method(x1=0, x2=observed2-1, PP=p3, N=vN)
	binP2 = BinP2_method(x1=observed2, x2=observed2, PP=p3, N=vN)
	if binP + binP2 < p:
	vsH = p3
	p3 = (vsL + p3) * 1.0 / 2
	else:
	vsL = p3
	p3 = (vsH + p3) * 1.0 / 2
	du = p3

	ll = round((dl * expected1 * 1.0)/((1-dl)*expected2), 3)
	ul = round((du * expected1 * 1.0)/((1-du)*expected2), 3)

	return(midP, ratio, ll, ul)
	return(midP, None, None, None )


	class TestSIRCompare(unittest.TestCase):
	def setUp(self):
	self.test_data = [
	# {'o1': 0 , 'e1': 0.079 , 'o2': 1 , 'e2': 0.079 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
	# {'o1': 0 , 'e1': 0.926 , 'o2': 1 , 'e2': 0.926 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
	# {'o1': 0 , 'e1': 0.079 , 'o2': 2 , 'e2': 1 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
	# {'o1': 0 , 'e1': 0.079 , 'o2': 1 , 'e2': 1.043 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
	# {'o1': 1 , 'e1': 1.043 , 'o2': 0 , 'e2': 0.079 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
	# {'o1': None , 'e1': 10 , 'o2': 10 , 'e2': 10 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
	# {'o1': 10 , 'e1': None , 'o2': 10 , 'e2': 10 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
	# {'o1': 10 , 'e1': 10 , 'o2': None , 'e2': 10 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
	# {'o1': 10 , 'e1': 10 , 'o2': 10 , 'e2': None , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
	{'o1': 35 , 'e1': 9.048 , 'o2': 10 , 'e2': 9.048 , 'ratio': 0.28571 , 'ci_lower': 0.13477 , 'ci_upper': 0.5636 , 'macro_p': 0.00016 , } ,
	{'o1': 2 , 'e1': 1.006 , 'o2': 1 , 'e2': 1.006 , 'ratio': 0.5 , 'ci_lower': 0.01695 , 'ci_upper': 6.5736 , 'macro_p': 0.625 , } ,
	{'o1': 3 , 'e1': 4.649 , 'o2': 2 , 'e2': 4.649 , 'ratio': 0.66667 , 'ci_lower': 0.07928 , 'ci_upper': 4.4831 , 'macro_p': 0.6875 , } ,
	{'o1': 5 , 'e1': 4.202 , 'o2': 4 , 'e2': 4.202 , 'ratio': 0.8 , 'ci_lower': 0.19114 , 'ci_upper': 3.1593 , 'macro_p': 0.75391 , } ,
	{'o1': 20 , 'e1': 13.469 , 'o2': 16 , 'e2': 13.469 , 'ratio': 0.8 , 'ci_lower': 0.40762 , 'ci_upper': 1.5506 , 'macro_p': 0.51138 , } ,
	{'o1': 9 , 'e1': 20.59 , 'o2': 8 , 'e2': 20.59 , 'ratio': 0.88889 , 'ci_lower': 0.32947 , 'ci_upper': 2.3631 , 'macro_p': 0.81453 , } ,
	{'o1': 18 , 'e1': 11.262 , 'o2': 16 , 'e2': 11.262 , 'ratio': 0.88889 , 'ci_lower': 0.44658 , 'ci_upper': 1.757 , 'macro_p': 0.73588 , } ,
	{'o1': 1 , 'e1': 3.168 , 'o2': 1 , 'e2': 3.168 , 'ratio': 1 , 'ci_lower': 0.02564 , 'ci_upper': 39.0037 , 'macro_p': 1 , } ,
	{'o1': 1 , 'e1': 2.59 , 'o2': 2 , 'e2': 2.59 , 'ratio': 2 , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625 , } ,
	{'o1': 1 , 'e1': 1.527 , 'o2': 2 , 'e2': 1.527 , 'ratio': 2 , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625 , } ,
	{'o1': 1 , 'e1': 1.006 , 'o2': 2 , 'e2': 1.006 , 'ratio': 2 , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625 , } ,
	{'o1': 6 , 'e1': 13.47 , 'o2': 7 , 'e2': 6.79 , 'ratio': 2.31443 , 'ci_lower': 0.74998 , 'ci_upper': 7.3312 , 'macro_p': 0.14197 , } ,
	{'o1': 10 , 'e1': 9.048 , 'o2': 35 , 'e2': 9.048 , 'ratio': 3.5 , 'ci_lower': 1.77426 , 'ci_upper': 7.42 , 'macro_p': 0.00016 , }
	]

	def test_all(self):
	for d in self.test_data:
	# Data to test
	t = [v for v in compare_sirs(d['o1'], d['e1'], d['o2'], d['e2'])]

	# Reference data
	a = [d['macro_p'], d['ratio'], d['ci_lower'], d['ci_upper']]

	# Fix some rounding things
	if t[0] is not None:
	t[0] = round(t[0], 5)
	if t[1] is not None:
	t[1] = round(t[1], 5)
	if a[2] is not None:
	a[2] = round(a[2], 3)
	if a[3] is not None:
	a[3] = round(a[3], 3)

	self.assertEqual(t, a)


	if __name__ == '__main__':
	unittest.main()