Skip to content

Instantly share code, notes, and snippets.

View dvsrepo's full-sized avatar

Daniel Vila Suero dvsrepo

View GitHub Profile
@dvsrepo
dvsrepo / data_sample.json
Last active September 25, 2020 10:06
NER + Relation classification multitask draft
{"text":"A large marble was dropped into the bowl.","entities":[{"start":36,"end":40,"label":"OBJECT","text":"bowl"},{"start":8,"end":14,"label":"SUBJECT","text":"marble"}],"label":"Entity-Destination(e1,e2)"}
{"text":"A portion of the ethernet cable 's outer insulation is in the connector as well.","entities":[{"start":26,"end":31,"label":"OBJECT","text":"cable"},{"start":41,"end":51,"label":"SUBJECT","text":"insulation"}],"label":"Component-Whole(e2,e1)"}
{"text":"A soldier brings oranges he got out from a tank.","entities":[{"start":43,"end":47,"label":"OBJECT","text":"tank"},{"start":17,"end":24,"label":"SUBJECT","text":"oranges"}],"label":"Entity-Origin(e1,e2)"}
{"text":"A train ran into a truck, leaving 14 dead.","entities":[{"start":19,"end":24,"label":"OBJECT","text":"truck"},{"start":2,"end":7,"label":"SUBJECT","text":"train"}],"label":"Entity-Destination(e1,e2)"}
{"text":"The recipes are culled from various restaurant chefs, magazines and Brother Victor-Antoine d'Avila-Latourrette, a monk and cookbo
@base <https://www.food.com/recipe/> .
@prefix ind: <http://purl.org/heals/ingredient/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix wtm: <http://purl.org/heals/food/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
ind:Almond a wtm:Ingredient ;
skos:definition "the nutlike kernel of the fruit of either of two trees, Prunus dulcis (sweet almond) or P. dulcis amara (bitter almond), which grow in warm temperate regions" ;
skos:prefLabel "almond" .
from transformers import pipeline
from datasets import load_dataset
import rubrix as rb
model = pipeline('zero-shot-classification', model="typeform/squeezebert-mnli")
dataset = load_dataset("ag_news", split='test')
# Labels are: 'World', 'Sports', 'Business', 'Sci/Tech'
labels = dataset.features["label"].names
for example in dataset:
text = "I love the song Computer Love from Kraftwerk"
record = rb.TokenClassificationRecord(
text=text,
tokens=[t for t in text.split(' ')],
prediction=[("SONG", 16, 29), ("BAND", 35, 44)],
prediction_agent="my_ner_model_v1"
)
rb.log(record, name="ner_bands_dataset")
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import Trainer
# from here, it's just regular fine-tuning with 🤗 transformers
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=4)
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
from datasets import Dataset
import rubrix as rb
# load rubrix dataset
df = rb.load('unlabelled_dataset_zeroshot')
# inputs can be dicts to support multifield classifiers, we just use the text here.
df['text'] = df.inputs.transform(lambda r: r['text'])
# we flatten the annotations and create a dict for turning labels into numeric ids
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers_interpret import SequenceClassificationExplainer
from datasets import load_dataset
import rubrix as rb
from rubrix import TokenAttributions
# Load Stanford sentiment treebank test set
dataset = load_dataset("sst", "default", split="test")
import transformers
from datasets import load_dataset
from sklearn.preprocessing import MinMaxScaler
import shap
from rubrix import TextClassificationRecord, TokenAttributions
import rubrix as rb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Model AGIEval GPT4All TruthfulQA Bigbench
zephyr-7b-spin-iter1-v0 Error: File does not exist Error: File does not exist Error: File does not exist Error: File does not exist

AGIEval

Average: Error: File does not exist%

GPT4All