Skip to content

Instantly share code, notes, and snippets.

@ahelium
Created July 16, 2015 15:39
Show Gist options
  • Save ahelium/e1287c26fec859ca5369 to your computer and use it in GitHub Desktop.
Save ahelium/e1287c26fec859ca5369 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from math import *
from decimal import Decimal
from sklearn import metrics
def create_dummy_cooc():
d = {'A': [0, 2, 1], 'B':[2, 0, 3], 'C': [1, 3, 0]}
data = pd.DataFrame(d, index=['A', 'B', 'C'])
labels = data.index
data = data.as_matrix()
return data, labels
def get_log_likelihood_df(data, labels):
items = []
for brief1 in labels:
for brief2 in labels:
if brief1 == brief2:
items.append(dict(Brief1=brief1, Brief2=brief2, LLR=0))
else:
llr = get_log_likelihood(data, labels, brief1, brief2)
items.append(dict(Brief1=brief1, Brief2=brief2, LLR=llr))
print brief1, brief2, llr
constructed = pd.DataFrame(items)
return pd.pivot_table(data=constructed, columns='Brief1', index='Brief2', aggfunc=sum)
def get_log_likelihood(data, labels, brief1, brief2):
X = data[:, labels == brief1]
X = np.append(X, np.sum(data[:, labels != brief1], axis=1, keepdims=True), axis=1)
y = LabelBinarizer().fit_transform(labels != brief2)
y = np.append(1 - y, y, axis=1)
x_table = np.dot(y.T, X)
print x_table
feature_count = np.sum(X, axis=0, keepdims=True)
class_prob = np.mean(y, axis=0, keepdims=True)
expected_table = np.dot(class_prob.T, feature_count)
llr = np.sum(x_table * np.log(x_table / expected_table))
return llr
def main():
data, labels = create_dummy_cooc()
llr_df = get_log_likelihood_df(data, labels)
print llr_df # yay
return llr_df
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment