Skip to content

Instantly share code, notes, and snippets.

@kylebgorman
Last active February 6, 2024 19:49
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kylebgorman/0cc8f42f870b8b1a07147ddf0fb44022 to your computer and use it in GitHub Desktop.
Save kylebgorman/0cc8f42f870b8b1a07147ddf0fb44022 to your computer and use it in GitHub Desktop.
Log-odds calculations
"""Log-odds computations."""
from libc.math cimport log, sqrt
from libc.stdint cimport int64_t
ctypedef int64_t int64
cpdef double log_odds(int64 c, int64 n):
"""
log_odds(c1, c2)
Computes log odds for count data.
Args:
c: count of event.
n: count of all events for the first sample.
Returns:
The log odds.
"""
return log(c) - log(n - c)
cpdef double log_odds_ratio(int64 c1, int64 n1, int64 c2, int64 n2):
"""
log_odds_ratio(c1, n1, c2, n2)
Computes log odds for two counts.
Args:
c1: count of the event in the first sample.
n1: count of all events for the first sample.
c2: count in the event in the second sample.
n1: count of all events for the second sample.
Returns:
Log odds.
"""
return log_odds(c1, n1) - log_odds(c2, n2)
cpdef double log_odds_ratio_idp(int64 c1, int64 n1, int64 c2, int64 n2,
int64 c3, int64 n3):
"""
log_odds_ratio_idp(c1, n1, c2, n2, c3, n3)
Computes log odds ratio with an informative Dirichlet prior, as in:
Monroe, B. L., Colaresi, M. P. and Quinn, K. M. 2009. Fightin' words:
Lexical feature selection and evaluation for identifying the content of
political conflict. Political Analysis 16: 372-403.
Args:
c1: count of the event in the first sample.
n1: count of all events for the first sample.
c2: count of the event in the second sample.
n1: count of all events for the second sample.
c3: count of the event in the third (i.e., the prior) sample.
n3: count of all events for the third (i.e., the prior) sample.
Returns:
Weighted log odds.
"""
cdef double c1p = c1 + c3
cdef double c2p = c2 + c3
cdef double numerator = log(c1p / (n1 + n3)) - log(c2p / (n2 + n3))
cdef double sigma = sqrt(1. / c1p + 1. / c2p)
return numerator / sigma
"""Unit tests for the log_odds module."""
import unittest
import log_odds
class CountTest(unittest.TestCase):
"""Data for the word "I", extracted from:
http://languagelog.ldc.upenn.edu/nll/?p=21068
"""
@classmethod
def setUpClass(cls):
# DJT speeches.
cls.c1 = 666
cls.n1 = 14746
# GWB's speeches.
cls.c2 = 356
cls.n2 = 14746
# Some unknown background corpus.
cls.c3 = 1022
# This last number has to be inferred from the counts per million.
cls.n3 = 29175
def testLogOdds(self):
self.assertAlmostEqual(-3.051221, log_odds.log_odds(self.c1, self.n1))
def testLogOddsRatio(self):
self.assertAlmostEqual(0.6481371,
log_odds.log_odds_ratio(self.c1, self.n1, self.c2, self.n2))
def testLogOddsRatioIDP(self):
self.assertAlmostEqual(5.5889577,
log_odds.log_odds_ratio_idp(self.c1, self.n1, self.c2, self.n2,
self.c3, self.c3))
if __name__ == "__main__":
unittest.main()
from setuptools import setup
from Cython.Build import cythonize
setup(name="log_odds",
version="0.2",
author="Kyle Gorman",
author_email="kylebgorman@gmail.com",
install_requires=["Cython>=0.28.5"],
ext_modules=cythonize(["log_odds.pyx"]),
test_suite="log_odds_test",
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment