Daniel Vila Suero dvsrepo

## data_sample.json
{"text":"A large marble was dropped into the bowl.","entities":[{"start":36,"end":40,"label":"OBJECT","text":"bowl"},{"start":8,"end":14,"label":"SUBJECT","text":"marble"}],"label":"Entity-Destination(e1,e2)"}
{"text":"A portion of the ethernet cable 's outer insulation is in the connector as well.","entities":[{"start":26,"end":31,"label":"OBJECT","text":"cable"},{"start":41,"end":51,"label":"SUBJECT","text":"insulation"}],"label":"Component-Whole(e2,e1)"}
{"text":"A soldier brings oranges he got out from a tank.","entities":[{"start":43,"end":47,"label":"OBJECT","text":"tank"},{"start":17,"end":24,"label":"SUBJECT","text":"oranges"}],"label":"Entity-Origin(e1,e2)"}
{"text":"A train ran into a truck, leaving 14 dead.","entities":[{"start":19,"end":24,"label":"OBJECT","text":"truck"},{"start":2,"end":7,"label":"SUBJECT","text":"train"}],"label":"Entity-Destination(e1,e2)"}
{"text":"The recipes are culled from various restaurant chefs, magazines and Brother Victor-Antoine d'Avila-Latourrette, a monk and cookbo

## example_0D_duration.ttl
@base <https://www.food.com/recipe/> .
@prefix ind: <http://purl.org/heals/ingredient/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@prefix wtm: <http://purl.org/heals/food/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

ind:Almond a wtm:Ingredient ;
    skos:definition "the nutlike kernel of the fruit of either of two trees, Prunus dulcis (sweet almond) or P. dulcis amara (bitter almond), which grow in warm temperate regions" ;
    skos:prefLabel "almond" .

## rubrix-example.py
from transformers import pipeline
from datasets import load_dataset
import rubrix as rb

model = pipeline('zero-shot-classification', model="typeform/squeezebert-mnli")
dataset = load_dataset("ag_news", split='test')
# Labels are: 'World', 'Sports', 'Business', 'Sci/Tech'
labels = dataset.features["label"].names

for example in dataset:

## rubrix_ner.py
text = "I love the song Computer Love from Kraftwerk"

record = rb.TokenClassificationRecord(
    text=text,
    tokens=[t for t in text.split(' ')],
    prediction=[("SONG", 16, 29), ("BAND", 35, 44)],
    prediction_agent="my_ner_model_v1"
)
rb.log(record, name="ner_bands_dataset")

## huggingface_rubrix_example_load_train.py
from datasets import Dataset
import rubrix as rb

# load rubrix dataset
df = rb.load('unlabelled_dataset_zeroshot')

# inputs can be dicts to support multifield classifiers, we just use the text here.
df['text'] = df.inputs.transform(lambda r: r['text'])

# we flatten the annotations and create a dict for turning labels into numeric ids

## huggingface_rubrix_example.py
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import Trainer

# from here, it's just regular fine-tuning with 🤗 transformers
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=4)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

## rubrix_interpret.py
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers_interpret import SequenceClassificationExplainer
from datasets import load_dataset

import rubrix as rb
from rubrix import TokenAttributions

# Load Stanford sentiment treebank test set
dataset = load_dataset("sst", "default", split="test")

## rubrix-stanza.ipynb

      
              1 file
            
          
              2 forks
            
          
              0 comments
            
          
              9 stars
            
          
                dvsrepo
                / rubrix-stanza.ipynb
            
            
              Created
              December 19, 2021 21:59
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## rubrix_shap_example.py
import transformers
from datasets import load_dataset

from sklearn.preprocessing import MinMaxScaler

import shap

from rubrix import TextClassificationRecord, TokenAttributions

import rubrix as rb

## zephyr-7b-spin-iter1-v0-Nous.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                dvsrepo
                / zephyr-7b-spin-iter1-v0-Nous.md
            
            
              Created
              March 7, 2024 11:03
            
          
Model
AGIEval
GPT4All
TruthfulQA
Bigbench


zephyr-7b-spin-iter1-v0
Error: File does not exist
Error: File does not exist
Error: File does not exist
Error: File does not exist


AGIEval

Average: Error: File does not exist%
GPT4All
	{"text":"A large marble was dropped into the bowl.","entities":[{"start":36,"end":40,"label":"OBJECT","text":"bowl"},{"start":8,"end":14,"label":"SUBJECT","text":"marble"}],"label":"Entity-Destination(e1,e2)"}
	{"text":"A portion of the ethernet cable 's outer insulation is in the connector as well.","entities":[{"start":26,"end":31,"label":"OBJECT","text":"cable"},{"start":41,"end":51,"label":"SUBJECT","text":"insulation"}],"label":"Component-Whole(e2,e1)"}
	{"text":"A soldier brings oranges he got out from a tank.","entities":[{"start":43,"end":47,"label":"OBJECT","text":"tank"},{"start":17,"end":24,"label":"SUBJECT","text":"oranges"}],"label":"Entity-Origin(e1,e2)"}
	{"text":"A train ran into a truck, leaving 14 dead.","entities":[{"start":19,"end":24,"label":"OBJECT","text":"truck"},{"start":2,"end":7,"label":"SUBJECT","text":"train"}],"label":"Entity-Destination(e1,e2)"}
	{"text":"The recipes are culled from various restaurant chefs, magazines and Brother Victor-Antoine d'Avila-Latourrette, a monk and cookbo
	@base <https://www.food.com/recipe/> .
	@prefix ind: <http://purl.org/heals/ingredient/> .
	@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
	@prefix wtm: <http://purl.org/heals/food/> .
	@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

	ind:Almond a wtm:Ingredient ;
	skos:definition "the nutlike kernel of the fruit of either of two trees, Prunus dulcis (sweet almond) or P. dulcis amara (bitter almond), which grow in warm temperate regions" ;
	skos:prefLabel "almond" .
	from transformers import pipeline
	from datasets import load_dataset
	import rubrix as rb

	model = pipeline('zero-shot-classification', model="typeform/squeezebert-mnli")
	dataset = load_dataset("ag_news", split='test')
	# Labels are: 'World', 'Sports', 'Business', 'Sci/Tech'
	labels = dataset.features["label"].names

	for example in dataset:
	text = "I love the song Computer Love from Kraftwerk"

	record = rb.TokenClassificationRecord(
	text=text,
	tokens=[t for t in text.split(' ')],
	prediction=[("SONG", 16, 29), ("BAND", 35, 44)],
	prediction_agent="my_ner_model_v1"
	)
	rb.log(record, name="ner_bands_dataset")
	from datasets import Dataset
	import rubrix as rb

	# load rubrix dataset
	df = rb.load('unlabelled_dataset_zeroshot')

	# inputs can be dicts to support multifield classifiers, we just use the text here.
	df['text'] = df.inputs.transform(lambda r: r['text'])

	# we flatten the annotations and create a dict for turning labels into numeric ids
	from transformers import AutoModelForSequenceClassification
	from transformers import AutoTokenizer
	from transformers import Trainer

	# from here, it's just regular fine-tuning with 🤗 transformers
	tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
	model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=4)

	def tokenize_function(examples):
	return tokenizer(examples["text"], padding="max_length", truncation=True)
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	from transformers_interpret import SequenceClassificationExplainer
	from datasets import load_dataset

	import rubrix as rb
	from rubrix import TokenAttributions

	# Load Stanford sentiment treebank test set
	dataset = load_dataset("sst", "default", split="test")
	import transformers
	from datasets import load_dataset

	from sklearn.preprocessing import MinMaxScaler

	import shap

	from rubrix import TextClassificationRecord, TokenAttributions

	import rubrix as rb