Skip to content

Instantly share code, notes, and snippets.

@kylebgorman
Last active October 8, 2019 15:58
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kylebgorman/faa1e8545b25d1a0d559 to your computer and use it in GitHub Desktop.
Save kylebgorman/faa1e8545b25d1a0d559 to your computer and use it in GitHub Desktop.
Log-likelihood ratio test in Cython, using scipy for distribution functions.
"""Functions for computing log-likelihood ratio statistics."""
from libc.math cimport log
from scipy.stats import binom
from scipy.stats import chi2
cpdef double LLR(int c_a, int c_b, int c_ab, int n):
"""Computes log-likelihood ratio for two discrete events.
This function computes the the log-likelihood ratio for a 2x2 table.
The notation is that of:
M. Mohri & B. Roark. 2006. Probabilistic context-free grammar
induction based on structural zeros. In NAACL, pages 312-319.
Args:
c_a: Number of occurrences of a.
c_b: Number of occurrences of b.
c_ab: Number of co-occurrences of a and b; this must be less than or
equal to c_a and c_b.
n: Total number of occurrences.
Returns:
The log-likelihood ratio for this table.
"""
B = binom.pmf
assert 0 <= c_ab <= c_a <= n
assert 0 <= c_ab <= c_b <= n
cdef int c_abar = n - c_a
cdef int c_abar_b = c_b - c_ab
cdef double P_b = (<float> c_b) / n
cdef double P_b_given_a = (<float> c_ab) / c_a
cdef double P_b_given_abar = (<float> c_abar_b) / c_abar
# Numerator.
cdef double result = log(B(c_ab, c_a, P_b))
result += log(B(c_abar_b, c_abar, P_b))
# Denominator.
result -= log(B(c_ab, c_a, P_b_given_a))
result -= log(B(c_abar_b, c_abar, P_b_given_abar))
return result
cpdef double p_LLR(double llr, int df=1):
"""Computes p-value for a log-likelihood ratio.
This function computes the p-value for a log-likelihood ratio with the
stated degrees of freedom. The log-likelihood statistic is known to be
asymptotically chi-squared distributed, and thus this is used as the
reference distribution.
Args:
llr: The log-likelihood ratio as computed by `LLR`.
df: Degrees of freedom (usually 1).
Returns:
The p-value for the log-likelihood ratio test.
"""
return 1. - chi2.cdf(-2. * llr, df)
from setuptools import setup
from Cython.Build import cythonize
setup(name="Likelihood Ratio calculation",
version="0.1",
author="Kyle Gorman",
author_email="kylebgorman@gmail.com",
install_requires=["Cython >= 0.23"],
ext_modules=cythonize(["ratio.pyx"])
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment