Skip to content

Instantly share code, notes, and snippets.

View dvsrepo's full-sized avatar

Daniel Vila Suero dvsrepo

View GitHub Profile
Model AGIEval GPT4All TruthfulQA Bigbench
zephyr-7b-spin-iter1-v0 Error: File does not exist Error: File does not exist Error: File does not exist Error: File does not exist

AGIEval

Average: Error: File does not exist%

GPT4All

Model AGIEval GPT4All TruthfulQA Bigbench
zephyr-7b-spin-iter1-v0 Error: File does not exist Error: File does not exist Error: File does not exist Error: File does not exist

AGIEval

Average: Error: File does not exist%

GPT4All

import transformers
from datasets import load_dataset
from sklearn.preprocessing import MinMaxScaler
import shap
from rubrix import TextClassificationRecord, TokenAttributions
import rubrix as rb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers_interpret import SequenceClassificationExplainer
from datasets import load_dataset
import rubrix as rb
from rubrix import TokenAttributions
# Load Stanford sentiment treebank test set
dataset = load_dataset("sst", "default", split="test")
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import Trainer
# from here, it's just regular fine-tuning with 🤗 transformers
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=4)
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
from datasets import Dataset
import rubrix as rb
# load rubrix dataset
df = rb.load('unlabelled_dataset_zeroshot')
# inputs can be dicts to support multifield classifiers, we just use the text here.
df['text'] = df.inputs.transform(lambda r: r['text'])
# we flatten the annotations and create a dict for turning labels into numeric ids
text = "I love the song Computer Love from Kraftwerk"
record = rb.TokenClassificationRecord(
text=text,
tokens=[t for t in text.split(' ')],
prediction=[("SONG", 16, 29), ("BAND", 35, 44)],
prediction_agent="my_ner_model_v1"
)
rb.log(record, name="ner_bands_dataset")
from transformers import pipeline
from datasets import load_dataset
import rubrix as rb
model = pipeline('zero-shot-classification', model="typeform/squeezebert-mnli")
dataset = load_dataset("ag_news", split='test')
# Labels are: 'World', 'Sports', 'Business', 'Sci/Tech'
labels = dataset.features["label"].names
for example in dataset:
@base <https://www.food.com/recipe/> .
@prefix ind: <http://purl.org/heals/ingredient/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix wtm: <http://purl.org/heals/food/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
ind:Almond a wtm:Ingredient ;
skos:definition "the nutlike kernel of the fruit of either of two trees, Prunus dulcis (sweet almond) or P. dulcis amara (bitter almond), which grow in warm temperate regions" ;
skos:prefLabel "almond" .