Skip to content

Instantly share code, notes, and snippets.

import statistics as st
def metric1(scores, row_aggregator, column_aggregator, cell_aggregator):
row_values = []
for row_idx, row1 in enumerate(scores):
diagonal_x = row1[row_idx]
row_values.append(
column_aggregator(
[cell_aggregator(diagonal_x, x, abs(col_idx - row_idx)) for col_idx, x in enumerate(row1) if col_idx != row_idx]
)
from datasets import load_dataset
dataset = load_dataset("eli5")
print(dataset)
path = "/Users/danielk/ideaProjects/qoogle-experiments/data"
trainfile = open(path + "/eli5/train.tsv", "w")
testfile = open(path + "/eli5/test.tsv", "w")
devfile = open(path + "/eli5/dev.tsv", "w")
@danyaljj
danyaljj / convert.py
Created July 12, 2021 22:50
convert.py
import json
from os import listdir
from os.path import isfile, join
tasks_path = '/Users/danielk/ideaProjects/instructions-demo/app/app/static/tasks/'
categories = {
'task001_quoref_question_generation': 'Generation',
'task002_quoref_answer_generation': 'Generation',
'task003_mctaco_question_generation_event_duration': 'Question Generation',
#!/usr/bin/env python
"""Evaluate model predictions against target.
Usage:
evaluate_predictions.py --model_mixture_name=NAME --dataset_mixture_name=NAME --bucket_name=GOOGLE_CLOUD_BUCKET_NAME --eval_metric=METRIC_NAME [--model_size=SIZE] [--input_sequence_length=LEN] [--output_sequence_length=LEN]
evaluate_predictions.py --eval_path=NAME --eval_metric=METRIC_NAME [--input_sequence_length=LEN] [--output_sequence_length=LEN]
evaluate_predictions.py -h| --help
Options:
-h --help Show this screen
--model_mixture_name=NAME Name of the model whose predictions are to be evaluated
import json
import os
import random
from tqdm import tqdm
with open("split.json") as f:
split_ids = json.load(f)
all_questions = {}
# this file extracts the predictions of several existing summarization systems for XSUM dataset
import json
from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
dataset = load_dataset('xsum')
total_len = len(dataset['test'])
batch_size = 16
@danyaljj
danyaljj / gpt2_generation_embeddings.py
Created June 28, 2021 21:57
Querying GPT2 with embeddings, instead of input-ids
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''
# typically we embed a one-hot vector. But here since we work we work with dense representations,
# this is working with transformers === 4.2.1
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''
@danyaljj
danyaljj / gpt2-decoding.py
Created June 22, 2021 22:44
gpt2-decoding.py
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
def embed_inputs(embedding, logits, device='cuda'):
'''
embeds inputs in a dense representation, before passing them to the model
'''
probs = F.softmax(logits, dim=-1)