This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Evaluate model predictions against target. | |
Usage: | |
evaluate_predictions.py --model_mixture_name=NAME --dataset_mixture_name=NAME --bucket_name=GOOGLE_CLOUD_BUCKET_NAME --eval_metric=METRIC_NAME [--model_size=SIZE] [--input_sequence_length=LEN] [--output_sequence_length=LEN] | |
evaluate_predictions.py --eval_path=NAME --eval_metric=METRIC_NAME [--input_sequence_length=LEN] [--output_sequence_length=LEN] | |
evaluate_predictions.py -h| --help | |
Options: | |
-h --help Show this screen | |
--model_mixture_name=NAME Name of the model whose predictions are to be evaluated |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import random | |
from tqdm import tqdm | |
with open("split.json") as f: | |
split_ids = json.load(f) | |
all_questions = {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this file extracts the predictions of several existing summarization systems for XSUM dataset | |
import json | |
from datasets import load_dataset | |
from tqdm import tqdm | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
dataset = load_dataset('xsum') | |
total_len = len(dataset['test']) | |
batch_size = 16 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from torch.distributions import Categorical | |
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import torch | |
import torch.nn.functional as F | |
def embed_inputs(embedding, logits, device='cuda', print_entropy=False): | |
''' | |
embeds inputs in a dense representation, before passing them to the model | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import torch | |
import torch.nn.functional as F | |
def embed_inputs(embedding, logits, device='cuda'): | |
''' | |
embeds inputs in a dense representation, before passing them to the model | |
''' | |
# typically we embed a one-hot vector. But here since we work we work with dense representations, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this is working with transformers === 4.2.1 | |
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import torch | |
import torch.nn.functional as F | |
def embed_inputs(embedding, logits, device='cuda'): | |
''' | |
embeds inputs in a dense representation, before passing them to the model | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import torch | |
import torch.nn.functional as F | |
def embed_inputs(embedding, logits, device='cuda'): | |
''' | |
embeds inputs in a dense representation, before passing them to the model | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import torch | |
import torch.nn.functional as F | |
def embed_inputs(embedding, logits, device='cuda'): | |
''' | |
embeds inputs in a dense representation, before passing them to the model | |
''' | |
probs = F.softmax(logits, dim=-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import torch | |
import torch.nn.functional as F | |
from torch import nn | |
def embed_inputs(embedding, logits, device='cuda'): | |
''' | |
embeds inputs in a dense representation, before passing them to the model | |
''' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import torch | |
import torch.nn.functional as F | |
from torch import nn | |
def embed_inputs(embedding, logits, device='cuda'): | |
''' | |
embeds inputs in a dense representation, before passing them to the model | |
''' |