Skip to content

Instantly share code, notes, and snippets.

@rjurney rjurney/snorkel.py
Last active Feb 13, 2020

Embed
What would you like to do?
Utilities for making Snorkel display all LabelFunction errors
from snorkel.analysis import get_label_buckets
ABSTAIN = -1
GENERAL = 0
API = 1
EDUCATION = 2
DATASET = 3
names = ['GENERAL', 'API', 'EDUCATION', 'DATASET']
# Trim the fields for figuring out problems
df_viz = df_test[['full_name', 'description', 'readme']]
# Display all errors for debugging purposes
pd.set_option('display.max_rows', len(df_viz.index))
def get_mistakes(df, labels, names):
"""Take DataFrame and pair of actual/predicted labels/names and return a DataFrame showing those records."""
df_fn = df.iloc[buckets[labels]]
df_fn['probability'] = probs_test[buckets[labels], 1]
df_fn['true label'] = names[0]
df_fn['predicted label'] = names[1]
return df_fn
def mistakes_df(df, label_model, L_test, y_test, names):
"""Compute a DataFrame of all the mistakes we've seen."""
out_dfs = []
probs_test = label_model.predict_proba(L=L_test)
probs_test = probs_test >= 0.5
buckets = get_label_buckets(y_test, probs_test[:, 1])
for (actual, predicted) in buckets.keys():
if actual != predicted:
actual_name = names[actual]
predicted_name = names[predicted]
out_dfs.append(
get_mistakes(
df,
labels=(actual, predicted),
names=(actual_name, predicted_name)
)
)
return out_dfs[0].append(
out_dfs[1:]
)
mistakes_df(df_viz, majority_model, L_test, y_test, names).head(202)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.