Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python
"""Evaluate model predictions against target.
Usage:
evaluate_predictions.py --model_mixture_name=NAME --dataset_mixture_name=NAME --bucket_name=GOOGLE_CLOUD_BUCKET_NAME --eval_metric=METRIC_NAME [--model_size=SIZE] [--input_sequence_length=LEN] [--output_sequence_length=LEN]
evaluate_predictions.py --eval_path=NAME --eval_metric=METRIC_NAME [--input_sequence_length=LEN] [--output_sequence_length=LEN]
evaluate_predictions.py -h| --help
Options:
-h --help Show this screen
--model_mixture_name=NAME Name of the model whose predictions are to be evaluated
import json
import os
import random
from tqdm import tqdm
with open("split.json") as f:
split_ids = json.load(f)
all_questions = {}
# this file extracts the predictions of several existing summarization systems for XSUM dataset
import json
from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
dataset = load_dataset('xsum')
total_len = len(dataset['test'])
batch_size = 16
@danyaljj
danyaljj / generation_quality_vs_prompt_peaky-ness.py
Created June 29, 2021 02:40
prompting gpt2 with "soft" prompts
from torch.distributions import Categorical
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
def embed_inputs(embedding, logits, device='cuda', print_entropy=False):
'''
embeds inputs in a dense representation, before passing them to the model
'''
@danyaljj
danyaljj / gpt2_generation_embeddings.py
Created June 28, 2021 21:57
Querying GPT2 with embeddings, instead of input-ids
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''
# typically we embed a one-hot vector. But here since we work we work with dense representations,
# this is working with transformers === 4.2.1
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''
@danyaljj
danyaljj / gpt2-decoding.py
Created June 22, 2021 22:44
gpt2-decoding.py
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''
probs = F.softmax(logits, dim=-1)
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
from torch import nn
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
from torch import nn
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''