-
-
Save ahelium/e1287c26fec859ca5369 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.preprocessing import LabelBinarizer | |
from math import * | |
from decimal import Decimal | |
from sklearn import metrics | |
def create_dummy_cooc(): | |
d = {'A': [0, 2, 1], 'B':[2, 0, 3], 'C': [1, 3, 0]} | |
data = pd.DataFrame(d, index=['A', 'B', 'C']) | |
labels = data.index | |
data = data.as_matrix() | |
return data, labels | |
def get_log_likelihood_df(data, labels): | |
items = [] | |
for brief1 in labels: | |
for brief2 in labels: | |
if brief1 == brief2: | |
items.append(dict(Brief1=brief1, Brief2=brief2, LLR=0)) | |
else: | |
llr = get_log_likelihood(data, labels, brief1, brief2) | |
items.append(dict(Brief1=brief1, Brief2=brief2, LLR=llr)) | |
print brief1, brief2, llr | |
constructed = pd.DataFrame(items) | |
return pd.pivot_table(data=constructed, columns='Brief1', index='Brief2', aggfunc=sum) | |
def get_log_likelihood(data, labels, brief1, brief2): | |
X = data[:, labels == brief1] | |
X = np.append(X, np.sum(data[:, labels != brief1], axis=1, keepdims=True), axis=1) | |
y = LabelBinarizer().fit_transform(labels != brief2) | |
y = np.append(1 - y, y, axis=1) | |
x_table = np.dot(y.T, X) | |
print x_table | |
feature_count = np.sum(X, axis=0, keepdims=True) | |
class_prob = np.mean(y, axis=0, keepdims=True) | |
expected_table = np.dot(class_prob.T, feature_count) | |
llr = np.sum(x_table * np.log(x_table / expected_table)) | |
return llr | |
def main(): | |
data, labels = create_dummy_cooc() | |
llr_df = get_log_likelihood_df(data, labels) | |
print llr_df # yay | |
return llr_df | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment