Skip to content

Instantly share code, notes, and snippets.

@masnick

masnick/compare_2_sirs.py

Last active Apr 28, 2016
Embed
What would you like to do?
Compare 2 SIRs in Python
"""
Compare 2 SIRs by exact binomial and mid-p
==========================================
This is a conversion of SAS code written by Minn Soe and published by the CDC.
The original SAS code is available here: http://www.cdc.gov/nhsn/sas/binom.sas
Author: Max Masnick, PhD (https://masnick.org/contact)
Last modified: 2016-04-28
Installation
------------
The only dependency is `scipy`, which can be installed with `pip install scipy`.
Running
-------
This file is set up to run a number of tests to verify the accuracy of the code
translation. To do this, run:
python -m unittest compare_sirs_cdc
To use the `compare_sirs(...)` method in your own code, you'll want to extract
that method and the supporting `BinP_method()` and `BinP2_method()`.
Then simply call `compare_sirs(observed1, expected1, observed2, expected2)`.
This method will return a tuple as described in the comment inside the method
below.
Disclaimer
---------
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
from scipy.stats import binom
import unittest
def BinP_method(x1, x2, PP, N):
q = PP * 1.0 / (1 - PP)
k = 0
v = 1
s = 0
tot = 0
while (k<=N):
tot += v
if k>= x1 and k <= x2:
s += v
if tot > 10 ** 30:
s = s * 1.0 / (10**30)
tot = tot * 1.0 / (10**30)
v = v * 1.0 / (10**30)
k += 1
v = v * 1.0 * q * (N + 1 - k) / k
return s * 1.0 / tot
def BinP2_method(x1, x2, PP, N):
return BinP_method(x1, x2, PP, N) * 0.5
def compare_sirs(observed1, expected1, observed2, expected2):
"""
Based on http://www.cdc.gov/nhsn/sas/binom.sas
Returns: tuple with the following elements
1. Two-tailed P value
2. Ratio of SIRs
3. Lower 95% CI limit
4. Upper 95% CI limit
"""
# Can't run if both observed values are 0
if observed1 == 0 and observed2 == 0:
return (None, None, None, None)
# Can't run if expecteds are < 1
if expected1 < 1 or expected2 < 1:
return (None, None, None, None)
vN = observed1 + observed2
vP = observed2 * 1.0 / vN
ratio1 = observed1 * 1.0 / expected1
ratio2 = observed2 * 1.0 / expected2
O = observed1 + observed2
T = expected1 + expected2
# Take the larger of the two SIRs
if ratio1 >= ratio2:
# probbnml(0.5, 10, 4) = 0.37695 -> binom.cdf(4, 10, 0.5) = 0.37695
# p n x x n p
p = expected1 * 1.0 / T
n = O
x = observed1
else:
p = expected2 * 1.0 / T
n = O
x = observed2
# Testing for Ha: RR>1
p1 = 1 - binom.cdf(x, n, p)
p2 = 0.5 * (binom.cdf(x, n, p) - binom.cdf(x-1, n, p))
p3 = p1 + p2
midP = 2 * p3
if midP > 1:
midP = 1
if midP < 0:
midP = 0
if observed1 != 0: # prevents division by 0 when ratio1=0
ratio = (observed2 * 1.0 / expected2) / (observed1 * 1.0 / expected1)
if observed2 == 0:
dl = 0;
else:
p2 = vP * 1.0 / 2
vsL = 0
vsH = vP
p = 2.5/100
while (vsH - vsL) > (10**-5):
binP = BinP_method(x1=observed2 + 1, x2=vN, PP=p2, N=vN)
binP2 = BinP2_method(x1=observed2, x2=observed2, PP=p2, N=vN)
if binP + binP2 > p:
vsH = p2
p2 = (vsL + p2) * 1.0 / 2
else:
vsL = p2
p2 = (vsH + p2) * 1.0 / 2
dl = p2
if observed2 == vN:
du = 1
else:
p3 = (1 + vP) * 1.0 / 2
vsL = vP
vsH = 1
p = 2.5/100
while (vsH - vsL) > (10**-5):
binP = BinP_method(x1=0, x2=observed2-1, PP=p3, N=vN)
binP2 = BinP2_method(x1=observed2, x2=observed2, PP=p3, N=vN)
if binP + binP2 < p:
vsH = p3
p3 = (vsL + p3) * 1.0 / 2
else:
vsL = p3
p3 = (vsH + p3) * 1.0 / 2
du = p3
ll = round((dl * expected1 * 1.0)/((1-dl)*expected2), 3)
ul = round((du * expected1 * 1.0)/((1-du)*expected2), 3)
return(midP, ratio, ll, ul)
return(midP, None, None, None )
class TestSIRCompare(unittest.TestCase):
def setUp(self):
self.test_data = [
# {'o1': 0 , 'e1': 0.079 , 'o2': 1 , 'e2': 0.079 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
# {'o1': 0 , 'e1': 0.926 , 'o2': 1 , 'e2': 0.926 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
# {'o1': 0 , 'e1': 0.079 , 'o2': 2 , 'e2': 1 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
# {'o1': 0 , 'e1': 0.079 , 'o2': 1 , 'e2': 1.043 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
# {'o1': 1 , 'e1': 1.043 , 'o2': 0 , 'e2': 0.079 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
# {'o1': None , 'e1': 10 , 'o2': 10 , 'e2': 10 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
# {'o1': 10 , 'e1': None , 'o2': 10 , 'e2': 10 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
# {'o1': 10 , 'e1': 10 , 'o2': None , 'e2': 10 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
# {'o1': 10 , 'e1': 10 , 'o2': 10 , 'e2': None , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } ,
{'o1': 35 , 'e1': 9.048 , 'o2': 10 , 'e2': 9.048 , 'ratio': 0.28571 , 'ci_lower': 0.13477 , 'ci_upper': 0.5636 , 'macro_p': 0.00016 , } ,
{'o1': 2 , 'e1': 1.006 , 'o2': 1 , 'e2': 1.006 , 'ratio': 0.5 , 'ci_lower': 0.01695 , 'ci_upper': 6.5736 , 'macro_p': 0.625 , } ,
{'o1': 3 , 'e1': 4.649 , 'o2': 2 , 'e2': 4.649 , 'ratio': 0.66667 , 'ci_lower': 0.07928 , 'ci_upper': 4.4831 , 'macro_p': 0.6875 , } ,
{'o1': 5 , 'e1': 4.202 , 'o2': 4 , 'e2': 4.202 , 'ratio': 0.8 , 'ci_lower': 0.19114 , 'ci_upper': 3.1593 , 'macro_p': 0.75391 , } ,
{'o1': 20 , 'e1': 13.469 , 'o2': 16 , 'e2': 13.469 , 'ratio': 0.8 , 'ci_lower': 0.40762 , 'ci_upper': 1.5506 , 'macro_p': 0.51138 , } ,
{'o1': 9 , 'e1': 20.59 , 'o2': 8 , 'e2': 20.59 , 'ratio': 0.88889 , 'ci_lower': 0.32947 , 'ci_upper': 2.3631 , 'macro_p': 0.81453 , } ,
{'o1': 18 , 'e1': 11.262 , 'o2': 16 , 'e2': 11.262 , 'ratio': 0.88889 , 'ci_lower': 0.44658 , 'ci_upper': 1.757 , 'macro_p': 0.73588 , } ,
{'o1': 1 , 'e1': 3.168 , 'o2': 1 , 'e2': 3.168 , 'ratio': 1 , 'ci_lower': 0.02564 , 'ci_upper': 39.0037 , 'macro_p': 1 , } ,
{'o1': 1 , 'e1': 2.59 , 'o2': 2 , 'e2': 2.59 , 'ratio': 2 , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625 , } ,
{'o1': 1 , 'e1': 1.527 , 'o2': 2 , 'e2': 1.527 , 'ratio': 2 , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625 , } ,
{'o1': 1 , 'e1': 1.006 , 'o2': 2 , 'e2': 1.006 , 'ratio': 2 , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625 , } ,
{'o1': 6 , 'e1': 13.47 , 'o2': 7 , 'e2': 6.79 , 'ratio': 2.31443 , 'ci_lower': 0.74998 , 'ci_upper': 7.3312 , 'macro_p': 0.14197 , } ,
{'o1': 10 , 'e1': 9.048 , 'o2': 35 , 'e2': 9.048 , 'ratio': 3.5 , 'ci_lower': 1.77426 , 'ci_upper': 7.42 , 'macro_p': 0.00016 , }
]
def test_all(self):
for d in self.test_data:
# Data to test
t = [v for v in compare_sirs(d['o1'], d['e1'], d['o2'], d['e2'])]
# Reference data
a = [d['macro_p'], d['ratio'], d['ci_lower'], d['ci_upper']]
# Fix some rounding things
if t[0] is not None:
t[0] = round(t[0], 5)
if t[1] is not None:
t[1] = round(t[1], 5)
if a[2] is not None:
a[2] = round(a[2], 3)
if a[3] is not None:
a[3] = round(a[3], 3)
self.assertEqual(t, a)
if __name__ == '__main__':
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment