Skip to content

Instantly share code, notes, and snippets.

View dvsrepo's full-sized avatar

Daniel Vila Suero dvsrepo

View GitHub Profile
Model AGIEval GPT4All TruthfulQA Bigbench
zephyr-7b-spin-iter1-v0 Error: File does not exist Error: File does not exist Error: File does not exist Error: File does not exist

AGIEval

Average: Error: File does not exist%

GPT4All

Model AGIEval GPT4All TruthfulQA Bigbench
zephyr-7b-spin-iter1-v0 Error: File does not exist Error: File does not exist Error: File does not exist Error: File does not exist

AGIEval

Average: Error: File does not exist%

GPT4All

Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import transformers
from datasets import load_dataset
from sklearn.preprocessing import MinMaxScaler
import shap
from rubrix import TextClassificationRecord, TokenAttributions
import rubrix as rb
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers_interpret import SequenceClassificationExplainer
from datasets import load_dataset
import rubrix as rb
from rubrix import TokenAttributions
# Load Stanford sentiment treebank test set
dataset = load_dataset("sst", "default", split="test")
from datasets import Dataset
import rubrix as rb
# load rubrix dataset
df = rb.load('unlabelled_dataset_zeroshot')
# inputs can be dicts to support multifield classifiers, we just use the text here.
df['text'] = df.inputs.transform(lambda r: r['text'])
# we flatten the annotations and create a dict for turning labels into numeric ids
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import Trainer
# from here, it's just regular fine-tuning with 🤗 transformers
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=4)
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
text = "I love the song Computer Love from Kraftwerk"
record = rb.TokenClassificationRecord(
text=text,
tokens=[t for t in text.split(' ')],
prediction=[("SONG", 16, 29), ("BAND", 35, 44)],
prediction_agent="my_ner_model_v1"
)
rb.log(record, name="ner_bands_dataset")
from transformers import pipeline
from datasets import load_dataset
import rubrix as rb
model = pipeline('zero-shot-classification', model="typeform/squeezebert-mnli")
dataset = load_dataset("ag_news", split='test')
# Labels are: 'World', 'Sports', 'Business', 'Sci/Tech'
labels = dataset.features["label"].names
for example in dataset:
@base <https://www.food.com/recipe/> .
@prefix ind: <http://purl.org/heals/ingredient/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix wtm: <http://purl.org/heals/food/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
ind:Almond a wtm:Ingredient ;
skos:definition "the nutlike kernel of the fruit of either of two trees, Prunus dulcis (sweet almond) or P. dulcis amara (bitter almond), which grow in warm temperate regions" ;
skos:prefLabel "almond" .