Skip to content

Instantly share code, notes, and snippets.

View dvsrepo's full-sized avatar

Daniel Vila Suero dvsrepo

View GitHub Profile
@dvsrepo
dvsrepo / data_sample.json
Last active September 25, 2020 10:06
NER + Relation classification multitask draft
{"text":"A large marble was dropped into the bowl.","entities":[{"start":36,"end":40,"label":"OBJECT","text":"bowl"},{"start":8,"end":14,"label":"SUBJECT","text":"marble"}],"label":"Entity-Destination(e1,e2)"}
{"text":"A portion of the ethernet cable 's outer insulation is in the connector as well.","entities":[{"start":26,"end":31,"label":"OBJECT","text":"cable"},{"start":41,"end":51,"label":"SUBJECT","text":"insulation"}],"label":"Component-Whole(e2,e1)"}
{"text":"A soldier brings oranges he got out from a tank.","entities":[{"start":43,"end":47,"label":"OBJECT","text":"tank"},{"start":17,"end":24,"label":"SUBJECT","text":"oranges"}],"label":"Entity-Origin(e1,e2)"}
{"text":"A train ran into a truck, leaving 14 dead.","entities":[{"start":19,"end":24,"label":"OBJECT","text":"truck"},{"start":2,"end":7,"label":"SUBJECT","text":"train"}],"label":"Entity-Destination(e1,e2)"}
{"text":"The recipes are culled from various restaurant chefs, magazines and Brother Victor-Antoine d'Avila-Latourrette, a monk and cookbo
@base <https://www.food.com/recipe/> .
@prefix ind: <http://purl.org/heals/ingredient/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix wtm: <http://purl.org/heals/food/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
ind:Almond a wtm:Ingredient ;
skos:definition "the nutlike kernel of the fruit of either of two trees, Prunus dulcis (sweet almond) or P. dulcis amara (bitter almond), which grow in warm temperate regions" ;
skos:prefLabel "almond" .
from transformers import pipeline
from datasets import load_dataset
import rubrix as rb
model = pipeline('zero-shot-classification', model="typeform/squeezebert-mnli")
dataset = load_dataset("ag_news", split='test')
# Labels are: 'World', 'Sports', 'Business', 'Sci/Tech'
labels = dataset.features["label"].names
for example in dataset:
text = "I love the song Computer Love from Kraftwerk"
record = rb.TokenClassificationRecord(
text=text,
tokens=[t for t in text.split(' ')],
prediction=[("SONG", 16, 29), ("BAND", 35, 44)],
prediction_agent="my_ner_model_v1"
)
rb.log(record, name="ner_bands_dataset")
from datasets import Dataset
import rubrix as rb
# load rubrix dataset
df = rb.load('unlabelled_dataset_zeroshot')
# inputs can be dicts to support multifield classifiers, we just use the text here.
df['text'] = df.inputs.transform(lambda r: r['text'])
# we flatten the annotations and create a dict for turning labels into numeric ids
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import Trainer
# from here, it's just regular fine-tuning with 🤗 transformers
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=4)
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers_interpret import SequenceClassificationExplainer
from datasets import load_dataset
import rubrix as rb
from rubrix import TokenAttributions
# Load Stanford sentiment treebank test set
dataset = load_dataset("sst", "default", split="test")
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import transformers
from datasets import load_dataset
from sklearn.preprocessing import MinMaxScaler
import shap
from rubrix import TextClassificationRecord, TokenAttributions
import rubrix as rb
Model AGIEval GPT4All TruthfulQA Bigbench
zephyr-7b-spin-iter1-v0 Error: File does not exist Error: File does not exist Error: File does not exist Error: File does not exist

AGIEval

Average: Error: File does not exist%

GPT4All