Last active
October 8, 2019 15:58
-
-
Save kylebgorman/faa1e8545b25d1a0d559 to your computer and use it in GitHub Desktop.
Log-likelihood ratio test in Cython, using scipy for distribution functions.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Functions for computing log-likelihood ratio statistics.""" | |
from libc.math cimport log | |
from scipy.stats import binom | |
from scipy.stats import chi2 | |
cpdef double LLR(int c_a, int c_b, int c_ab, int n): | |
"""Computes log-likelihood ratio for two discrete events. | |
This function computes the the log-likelihood ratio for a 2x2 table. | |
The notation is that of: | |
M. Mohri & B. Roark. 2006. Probabilistic context-free grammar | |
induction based on structural zeros. In NAACL, pages 312-319. | |
Args: | |
c_a: Number of occurrences of a. | |
c_b: Number of occurrences of b. | |
c_ab: Number of co-occurrences of a and b; this must be less than or | |
equal to c_a and c_b. | |
n: Total number of occurrences. | |
Returns: | |
The log-likelihood ratio for this table. | |
""" | |
B = binom.pmf | |
assert 0 <= c_ab <= c_a <= n | |
assert 0 <= c_ab <= c_b <= n | |
cdef int c_abar = n - c_a | |
cdef int c_abar_b = c_b - c_ab | |
cdef double P_b = (<float> c_b) / n | |
cdef double P_b_given_a = (<float> c_ab) / c_a | |
cdef double P_b_given_abar = (<float> c_abar_b) / c_abar | |
# Numerator. | |
cdef double result = log(B(c_ab, c_a, P_b)) | |
result += log(B(c_abar_b, c_abar, P_b)) | |
# Denominator. | |
result -= log(B(c_ab, c_a, P_b_given_a)) | |
result -= log(B(c_abar_b, c_abar, P_b_given_abar)) | |
return result | |
cpdef double p_LLR(double llr, int df=1): | |
"""Computes p-value for a log-likelihood ratio. | |
This function computes the p-value for a log-likelihood ratio with the | |
stated degrees of freedom. The log-likelihood statistic is known to be | |
asymptotically chi-squared distributed, and thus this is used as the | |
reference distribution. | |
Args: | |
llr: The log-likelihood ratio as computed by `LLR`. | |
df: Degrees of freedom (usually 1). | |
Returns: | |
The p-value for the log-likelihood ratio test. | |
""" | |
return 1. - chi2.cdf(-2. * llr, df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from setuptools import setup | |
from Cython.Build import cythonize | |
setup(name="Likelihood Ratio calculation", | |
version="0.1", | |
author="Kyle Gorman", | |
author_email="kylebgorman@gmail.com", | |
install_requires=["Cython >= 0.23"], | |
ext_modules=cythonize(["ratio.pyx"]) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment