cwharland/confusion_to_array.py

## confusion_to_array.py
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix

labels = ['N', 'L', 'R', 'A', 'P', 'V']
df = pd.DataFrame([
    [1971, 19, 1, 8, 0, 1],
    [16, 1940, 2, 23, 9, 10],
    [8, 3, 181, 87, 0, 11],
    [2, 25, 159, 1786, 16, 12],
    [0, 24, 4, 8, 1958, 6],
    [11, 12, 29, 11, 11, 1926] ], columns=labels, index=labels)
df.index.name = 'Actual'
df.columns.name = 'Predicted'

def create_arrays(df):
    # Unstack to make tuples of actual,pred,count
    df = df.unstack().reset_index()

    # Pull the value labels and counts
    actual = df['Actual'].values
    predicted = df['Predicted'].values
    totals = df.iloc[:,2].values

    # Use list comprehension to create original arrays
    y_true = [[curr_val]*n for (curr_val, n) in zip(actual, totals)]
    y_predicted = [[curr_val]*n for (curr_val, n) in zip(predicted, totals)]

    # They come nested so flatten them
    y_true = [item for sublist in y_true for item in sublist]
    y_predicted = [item for sublist in y_predicted for item in sublist]

    return y_true, y_predicted

# Recreate the original confusion matrix and check for equality
y_t, y_p = create_arrays(df)
conf_mat = confusion_matrix(y_t,y_p)
check_labels = np.unique(y_t)

df_new = pd.DataFrame(conf_mat, columns=check_labels, index=check_labels).loc[labels, labels]
df_new.index.name = 'Actual'
df_new.columns.name = 'Predicted'

df == df_new


# And for the binary
labels = ['False', 'True']
df = pd.DataFrame([
    [5, 3],
    [2, 7]], columns=labels, index=labels)
df.index.name = 'Actual'
df.columns.name = 'Predicted'

# Recreate the original confusion matrix and check for equality
y_t, y_p = create_arrays(df)
conf_mat = confusion_matrix(y_t,y_p)
check_labels = np.unique(y_t)

df_new = pd.DataFrame(conf_mat, columns=check_labels, index=check_labels).loc[labels, labels]
df_new.index.name = 'Actual'
df_new.columns.name = 'Predicted'

df == df_new
	import pandas as pd
	import numpy as np
	from sklearn.metrics import confusion_matrix

	labels = ['N', 'L', 'R', 'A', 'P', 'V']
	df = pd.DataFrame([
	[1971, 19, 1, 8, 0, 1],
	[16, 1940, 2, 23, 9, 10],
	[8, 3, 181, 87, 0, 11],
	[2, 25, 159, 1786, 16, 12],
	[0, 24, 4, 8, 1958, 6],
	[11, 12, 29, 11, 11, 1926] ], columns=labels, index=labels)
	df.index.name = 'Actual'
	df.columns.name = 'Predicted'

	def create_arrays(df):
	# Unstack to make tuples of actual,pred,count
	df = df.unstack().reset_index()

	# Pull the value labels and counts
	actual = df['Actual'].values
	predicted = df['Predicted'].values
	totals = df.iloc[:,2].values

	# Use list comprehension to create original arrays
	y_true = [[curr_val]*n for (curr_val, n) in zip(actual, totals)]
	y_predicted = [[curr_val]*n for (curr_val, n) in zip(predicted, totals)]

	# They come nested so flatten them
	y_true = [item for sublist in y_true for item in sublist]
	y_predicted = [item for sublist in y_predicted for item in sublist]

	return y_true, y_predicted

	# Recreate the original confusion matrix and check for equality
	y_t, y_p = create_arrays(df)
	conf_mat = confusion_matrix(y_t,y_p)
	check_labels = np.unique(y_t)

	df_new = pd.DataFrame(conf_mat, columns=check_labels, index=check_labels).loc[labels, labels]
	df_new.index.name = 'Actual'
	df_new.columns.name = 'Predicted'

	df == df_new


	# And for the binary
	labels = ['False', 'True']
	df = pd.DataFrame([
	[5, 3],
	[2, 7]], columns=labels, index=labels)
	df.index.name = 'Actual'
	df.columns.name = 'Predicted'

	# Recreate the original confusion matrix and check for equality
	y_t, y_p = create_arrays(df)
	conf_mat = confusion_matrix(y_t,y_p)
	check_labels = np.unique(y_t)

	df_new = pd.DataFrame(conf_mat, columns=check_labels, index=check_labels).loc[labels, labels]
	df_new.index.name = 'Actual'
	df_new.columns.name = 'Predicted'

	df == df_new