Last active
April 28, 2016 19:02
-
-
Save masnick/3dc523404e5eebb5332c4e4828f5c08f to your computer and use it in GitHub Desktop.
Compare 2 SIRs in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Compare 2 SIRs by exact binomial and mid-p | |
========================================== | |
This is a conversion of SAS code written by Minn Soe and published by the CDC. | |
The original SAS code is available here: http://www.cdc.gov/nhsn/sas/binom.sas | |
Author: Max Masnick, PhD (https://masnick.org/contact) | |
Last modified: 2016-04-28 | |
Installation | |
------------ | |
The only dependency is `scipy`, which can be installed with `pip install scipy`. | |
Running | |
------- | |
This file is set up to run a number of tests to verify the accuracy of the code | |
translation. To do this, run: | |
python -m unittest compare_sirs_cdc | |
To use the `compare_sirs(...)` method in your own code, you'll want to extract | |
that method and the supporting `BinP_method()` and `BinP2_method()`. | |
Then simply call `compare_sirs(observed1, expected1, observed2, expected2)`. | |
This method will return a tuple as described in the comment inside the method | |
below. | |
Disclaimer | |
--------- | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
THE SOFTWARE. | |
""" | |
from scipy.stats import binom | |
import unittest | |
def BinP_method(x1, x2, PP, N): | |
q = PP * 1.0 / (1 - PP) | |
k = 0 | |
v = 1 | |
s = 0 | |
tot = 0 | |
while (k<=N): | |
tot += v | |
if k>= x1 and k <= x2: | |
s += v | |
if tot > 10 ** 30: | |
s = s * 1.0 / (10**30) | |
tot = tot * 1.0 / (10**30) | |
v = v * 1.0 / (10**30) | |
k += 1 | |
v = v * 1.0 * q * (N + 1 - k) / k | |
return s * 1.0 / tot | |
def BinP2_method(x1, x2, PP, N): | |
return BinP_method(x1, x2, PP, N) * 0.5 | |
def compare_sirs(observed1, expected1, observed2, expected2): | |
""" | |
Based on http://www.cdc.gov/nhsn/sas/binom.sas | |
Returns: tuple with the following elements | |
1. Two-tailed P value | |
2. Ratio of SIRs | |
3. Lower 95% CI limit | |
4. Upper 95% CI limit | |
""" | |
# Can't run if both observed values are 0 | |
if observed1 == 0 and observed2 == 0: | |
return (None, None, None, None) | |
# Can't run if expecteds are < 1 | |
if expected1 < 1 or expected2 < 1: | |
return (None, None, None, None) | |
vN = observed1 + observed2 | |
vP = observed2 * 1.0 / vN | |
ratio1 = observed1 * 1.0 / expected1 | |
ratio2 = observed2 * 1.0 / expected2 | |
O = observed1 + observed2 | |
T = expected1 + expected2 | |
# Take the larger of the two SIRs | |
if ratio1 >= ratio2: | |
# probbnml(0.5, 10, 4) = 0.37695 -> binom.cdf(4, 10, 0.5) = 0.37695 | |
# p n x x n p | |
p = expected1 * 1.0 / T | |
n = O | |
x = observed1 | |
else: | |
p = expected2 * 1.0 / T | |
n = O | |
x = observed2 | |
# Testing for Ha: RR>1 | |
p1 = 1 - binom.cdf(x, n, p) | |
p2 = 0.5 * (binom.cdf(x, n, p) - binom.cdf(x-1, n, p)) | |
p3 = p1 + p2 | |
midP = 2 * p3 | |
if midP > 1: | |
midP = 1 | |
if midP < 0: | |
midP = 0 | |
if observed1 != 0: # prevents division by 0 when ratio1=0 | |
ratio = (observed2 * 1.0 / expected2) / (observed1 * 1.0 / expected1) | |
if observed2 == 0: | |
dl = 0; | |
else: | |
p2 = vP * 1.0 / 2 | |
vsL = 0 | |
vsH = vP | |
p = 2.5/100 | |
while (vsH - vsL) > (10**-5): | |
binP = BinP_method(x1=observed2 + 1, x2=vN, PP=p2, N=vN) | |
binP2 = BinP2_method(x1=observed2, x2=observed2, PP=p2, N=vN) | |
if binP + binP2 > p: | |
vsH = p2 | |
p2 = (vsL + p2) * 1.0 / 2 | |
else: | |
vsL = p2 | |
p2 = (vsH + p2) * 1.0 / 2 | |
dl = p2 | |
if observed2 == vN: | |
du = 1 | |
else: | |
p3 = (1 + vP) * 1.0 / 2 | |
vsL = vP | |
vsH = 1 | |
p = 2.5/100 | |
while (vsH - vsL) > (10**-5): | |
binP = BinP_method(x1=0, x2=observed2-1, PP=p3, N=vN) | |
binP2 = BinP2_method(x1=observed2, x2=observed2, PP=p3, N=vN) | |
if binP + binP2 < p: | |
vsH = p3 | |
p3 = (vsL + p3) * 1.0 / 2 | |
else: | |
vsL = p3 | |
p3 = (vsH + p3) * 1.0 / 2 | |
du = p3 | |
ll = round((dl * expected1 * 1.0)/((1-dl)*expected2), 3) | |
ul = round((du * expected1 * 1.0)/((1-du)*expected2), 3) | |
return(midP, ratio, ll, ul) | |
return(midP, None, None, None ) | |
class TestSIRCompare(unittest.TestCase): | |
def setUp(self): | |
self.test_data = [ | |
# {'o1': 0 , 'e1': 0.079 , 'o2': 1 , 'e2': 0.079 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } , | |
# {'o1': 0 , 'e1': 0.926 , 'o2': 1 , 'e2': 0.926 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } , | |
# {'o1': 0 , 'e1': 0.079 , 'o2': 2 , 'e2': 1 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } , | |
# {'o1': 0 , 'e1': 0.079 , 'o2': 1 , 'e2': 1.043 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } , | |
# {'o1': 1 , 'e1': 1.043 , 'o2': 0 , 'e2': 0.079 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } , | |
# {'o1': None , 'e1': 10 , 'o2': 10 , 'e2': 10 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } , | |
# {'o1': 10 , 'e1': None , 'o2': 10 , 'e2': 10 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } , | |
# {'o1': 10 , 'e1': 10 , 'o2': None , 'e2': 10 , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } , | |
# {'o1': 10 , 'e1': 10 , 'o2': 10 , 'e2': None , 'ratio': None , 'ci_lower': None , 'ci_upper': None , 'macro_p': None , } , | |
{'o1': 35 , 'e1': 9.048 , 'o2': 10 , 'e2': 9.048 , 'ratio': 0.28571 , 'ci_lower': 0.13477 , 'ci_upper': 0.5636 , 'macro_p': 0.00016 , } , | |
{'o1': 2 , 'e1': 1.006 , 'o2': 1 , 'e2': 1.006 , 'ratio': 0.5 , 'ci_lower': 0.01695 , 'ci_upper': 6.5736 , 'macro_p': 0.625 , } , | |
{'o1': 3 , 'e1': 4.649 , 'o2': 2 , 'e2': 4.649 , 'ratio': 0.66667 , 'ci_lower': 0.07928 , 'ci_upper': 4.4831 , 'macro_p': 0.6875 , } , | |
{'o1': 5 , 'e1': 4.202 , 'o2': 4 , 'e2': 4.202 , 'ratio': 0.8 , 'ci_lower': 0.19114 , 'ci_upper': 3.1593 , 'macro_p': 0.75391 , } , | |
{'o1': 20 , 'e1': 13.469 , 'o2': 16 , 'e2': 13.469 , 'ratio': 0.8 , 'ci_lower': 0.40762 , 'ci_upper': 1.5506 , 'macro_p': 0.51138 , } , | |
{'o1': 9 , 'e1': 20.59 , 'o2': 8 , 'e2': 20.59 , 'ratio': 0.88889 , 'ci_lower': 0.32947 , 'ci_upper': 2.3631 , 'macro_p': 0.81453 , } , | |
{'o1': 18 , 'e1': 11.262 , 'o2': 16 , 'e2': 11.262 , 'ratio': 0.88889 , 'ci_lower': 0.44658 , 'ci_upper': 1.757 , 'macro_p': 0.73588 , } , | |
{'o1': 1 , 'e1': 3.168 , 'o2': 1 , 'e2': 3.168 , 'ratio': 1 , 'ci_lower': 0.02564 , 'ci_upper': 39.0037 , 'macro_p': 1 , } , | |
{'o1': 1 , 'e1': 2.59 , 'o2': 2 , 'e2': 2.59 , 'ratio': 2 , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625 , } , | |
{'o1': 1 , 'e1': 1.527 , 'o2': 2 , 'e2': 1.527 , 'ratio': 2 , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625 , } , | |
{'o1': 1 , 'e1': 1.006 , 'o2': 2 , 'e2': 1.006 , 'ratio': 2 , 'ci_lower': 0.15212 , 'ci_upper': 58.9872 , 'macro_p': 0.625 , } , | |
{'o1': 6 , 'e1': 13.47 , 'o2': 7 , 'e2': 6.79 , 'ratio': 2.31443 , 'ci_lower': 0.74998 , 'ci_upper': 7.3312 , 'macro_p': 0.14197 , } , | |
{'o1': 10 , 'e1': 9.048 , 'o2': 35 , 'e2': 9.048 , 'ratio': 3.5 , 'ci_lower': 1.77426 , 'ci_upper': 7.42 , 'macro_p': 0.00016 , } | |
] | |
def test_all(self): | |
for d in self.test_data: | |
# Data to test | |
t = [v for v in compare_sirs(d['o1'], d['e1'], d['o2'], d['e2'])] | |
# Reference data | |
a = [d['macro_p'], d['ratio'], d['ci_lower'], d['ci_upper']] | |
# Fix some rounding things | |
if t[0] is not None: | |
t[0] = round(t[0], 5) | |
if t[1] is not None: | |
t[1] = round(t[1], 5) | |
if a[2] is not None: | |
a[2] = round(a[2], 3) | |
if a[3] is not None: | |
a[3] = round(a[3], 3) | |
self.assertEqual(t, a) | |
if __name__ == '__main__': | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment