kylebgorman/log_odds.pyx

## log_odds.pyx
"""Log-odds computations."""


from libc.math cimport log, sqrt
from libc.stdint cimport int64_t


ctypedef int64_t int64


cpdef double log_odds(int64 c, int64 n):
    """
    log_odds(c1, c2)

    Computes log odds for count data.

    Args:
      c: count of event.
      n: count of all events for the first sample.

    Returns:
      The log odds.
    """
    return log(c) - log(n - c)


cpdef double log_odds_ratio(int64 c1, int64 n1, int64 c2, int64 n2):
    """
    log_odds_ratio(c1, n1, c2, n2)

    Computes log odds for two counts.

    Args:
      c1: count of the event in the first sample.
      n1: count of all events for the first sample.
      c2: count in the event in the second sample.
      n1: count of all events for the second sample.

    Returns:
      Log odds.
    """
    return log_odds(c1, n1) - log_odds(c2, n2)


cpdef double log_odds_ratio_idp(int64 c1, int64 n1, int64 c2, int64 n2,
                                int64 c3, int64 n3):
    """
    log_odds_ratio_idp(c1, n1, c2, n2, c3, n3)

    Computes log odds ratio with an informative Dirichlet prior, as in:

        Monroe, B. L., Colaresi, M. P. and Quinn, K. M. 2009. Fightin' words:
        Lexical feature selection and evaluation for identifying the content of
        political conflict. Political Analysis 16: 372-403.

    Args:
      c1: count of the event in the first sample.
      n1: count of all events for the first sample.
      c2: count of the event in the second sample.
      n1: count of all events for the second sample.
      c3: count of the event in the third (i.e., the prior) sample.
      n3: count of all events for the third (i.e., the prior) sample.

    Returns:
      Weighted log odds.
    """
    cdef double c1p = c1 + c3
    cdef double c2p = c2 + c3
    cdef double numerator = log(c1p / (n1 + n3)) - log(c2p / (n2 + n3))
    cdef double sigma = sqrt(1. / c1p + 1. / c2p)
    return numerator / sigma

## log_odds_test.py
"""Unit tests for the log_odds module."""

import unittest

import log_odds


class CountTest(unittest.TestCase):
    """Data for the word "I", extracted from:

    http://languagelog.ldc.upenn.edu/nll/?p=21068
    """

    @classmethod
    def setUpClass(cls):
        # DJT speeches.
        cls.c1 = 666
        cls.n1 = 14746
        # GWB's speeches.
        cls.c2 = 356
        cls.n2 = 14746
        # Some unknown background corpus.
        cls.c3 = 1022
        # This last number has to be inferred from the counts per million.
        cls.n3 = 29175

    def testLogOdds(self):
        self.assertAlmostEqual(-3.051221, log_odds.log_odds(self.c1, self.n1))

    def testLogOddsRatio(self):
        self.assertAlmostEqual(0.6481371,
            log_odds.log_odds_ratio(self.c1, self.n1, self.c2, self.n2))

    def testLogOddsRatioIDP(self):
        self.assertAlmostEqual(5.5889577,
            log_odds.log_odds_ratio_idp(self.c1, self.n1, self.c2, self.n2,
                                        self.c3, self.c3))


if __name__ == "__main__":
    unittest.main()

## setup.py
from setuptools import setup
from Cython.Build import cythonize

setup(name="log_odds",
      version="0.2",
      author="Kyle Gorman",
      author_email="kylebgorman@gmail.com",
      install_requires=["Cython>=0.28.5"],
      ext_modules=cythonize(["log_odds.pyx"]),
      test_suite="log_odds_test",
)
	"""Log-odds computations."""


	from libc.math cimport log, sqrt
	from libc.stdint cimport int64_t


	ctypedef int64_t int64


	cpdef double log_odds(int64 c, int64 n):
	"""
	log_odds(c1, c2)

	Computes log odds for count data.

	Args:
	c: count of event.
	n: count of all events for the first sample.

	Returns:
	The log odds.
	"""
	return log(c) - log(n - c)


	cpdef double log_odds_ratio(int64 c1, int64 n1, int64 c2, int64 n2):
	"""
	log_odds_ratio(c1, n1, c2, n2)

	Computes log odds for two counts.

	Args:
	c1: count of the event in the first sample.
	n1: count of all events for the first sample.
	c2: count in the event in the second sample.
	n1: count of all events for the second sample.

	Returns:
	Log odds.
	"""
	return log_odds(c1, n1) - log_odds(c2, n2)


	cpdef double log_odds_ratio_idp(int64 c1, int64 n1, int64 c2, int64 n2,
	int64 c3, int64 n3):
	"""
	log_odds_ratio_idp(c1, n1, c2, n2, c3, n3)

	Computes log odds ratio with an informative Dirichlet prior, as in:

	Monroe, B. L., Colaresi, M. P. and Quinn, K. M. 2009. Fightin' words:
	Lexical feature selection and evaluation for identifying the content of
	political conflict. Political Analysis 16: 372-403.

	Args:
	c1: count of the event in the first sample.
	n1: count of all events for the first sample.
	c2: count of the event in the second sample.
	n1: count of all events for the second sample.
	c3: count of the event in the third (i.e., the prior) sample.
	n3: count of all events for the third (i.e., the prior) sample.

	Returns:
	Weighted log odds.
	"""
	cdef double c1p = c1 + c3
	cdef double c2p = c2 + c3
	cdef double numerator = log(c1p / (n1 + n3)) - log(c2p / (n2 + n3))
	cdef double sigma = sqrt(1. / c1p + 1. / c2p)
	return numerator / sigma
	"""Unit tests for the log_odds module."""

	import unittest

	import log_odds


	class CountTest(unittest.TestCase):
	"""Data for the word "I", extracted from:

	http://languagelog.ldc.upenn.edu/nll/?p=21068
	"""

	@classmethod
	def setUpClass(cls):
	# DJT speeches.
	cls.c1 = 666
	cls.n1 = 14746
	# GWB's speeches.
	cls.c2 = 356
	cls.n2 = 14746
	# Some unknown background corpus.
	cls.c3 = 1022
	# This last number has to be inferred from the counts per million.
	cls.n3 = 29175

	def testLogOdds(self):
	self.assertAlmostEqual(-3.051221, log_odds.log_odds(self.c1, self.n1))

	def testLogOddsRatio(self):
	self.assertAlmostEqual(0.6481371,
	log_odds.log_odds_ratio(self.c1, self.n1, self.c2, self.n2))

	def testLogOddsRatioIDP(self):
	self.assertAlmostEqual(5.5889577,
	log_odds.log_odds_ratio_idp(self.c1, self.n1, self.c2, self.n2,
	self.c3, self.c3))


	if __name__ == "__main__":
	unittest.main()
	from setuptools import setup
	from Cython.Build import cythonize

	setup(name="log_odds",
	version="0.2",
	author="Kyle Gorman",
	author_email="kylebgorman@gmail.com",
	install_requires=["Cython>=0.28.5"],
	ext_modules=cythonize(["log_odds.pyx"]),
	test_suite="log_odds_test",
	)