Daniel Khashabi danyaljj

## metrics.py
import statistics as st

def metric1(scores, row_aggregator, column_aggregator, cell_aggregator):
    row_values = []
    for row_idx, row1 in enumerate(scores):
        diagonal_x = row1[row_idx]
        row_values.append(
            column_aggregator(
                [cell_aggregator(diagonal_x, x, abs(col_idx - row_idx)) for col_idx, x in enumerate(row1) if col_idx !=  row_idx]
            )

## create_eli5_dataset.py
from datasets import load_dataset

dataset = load_dataset("eli5")

print(dataset)

path = "/Users/danielk/ideaProjects/qoogle-experiments/data"
trainfile = open(path + "/eli5/train.tsv", "w")
testfile = open(path + "/eli5/test.tsv", "w")
devfile = open(path + "/eli5/dev.tsv", "w")

## convert.py
import json
from os import listdir
from os.path import isfile, join

tasks_path = '/Users/danielk/ideaProjects/instructions-demo/app/app/static/tasks/'

categories = {
    'task001_quoref_question_generation': 'Generation',
    'task002_quoref_answer_generation': 'Generation',
    'task003_mctaco_question_generation_event_duration': 'Question Generation',

## evaluate_gooaq.py
#!/usr/bin/env python

"""Evaluate model predictions against target.
Usage:
   evaluate_predictions.py --model_mixture_name=NAME --dataset_mixture_name=NAME --bucket_name=GOOGLE_CLOUD_BUCKET_NAME --eval_metric=METRIC_NAME [--model_size=SIZE] [--input_sequence_length=LEN] [--output_sequence_length=LEN]
   evaluate_predictions.py --eval_path=NAME --eval_metric=METRIC_NAME [--input_sequence_length=LEN] [--output_sequence_length=LEN]
   evaluate_predictions.py -h| --help
Options:
    -h --help                               Show this screen
   --model_mixture_name=NAME                Name of the model whose predictions are to be evaluated

## sample_gooaq_splits.py
import json
import os
import random

from tqdm import tqdm

with open("split.json") as f:
    split_ids = json.load(f)

all_questions = {}

## summarization_baselines.py
# this file extracts the predictions of several existing summarization systems for XSUM dataset
import json

from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

dataset = load_dataset('xsum')
total_len = len(dataset['test'])
batch_size = 16

## gpt2_generation_embeddings.py
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''
    # typically we embed a one-hot vector. But here since we work we work with dense representations,

## gpt2-decoding.py
# this is working with transformers === 4.2.1
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''

## gpt2-decoding.py

from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''

## gist:4d92b3465a76392fbbc51aeac22ac15b
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''
    probs = F.softmax(logits, dim=-1)
	import statistics as st

	def metric1(scores, row_aggregator, column_aggregator, cell_aggregator):
	row_values = []
	for row_idx, row1 in enumerate(scores):
	diagonal_x = row1[row_idx]
	row_values.append(
	column_aggregator(
	[cell_aggregator(diagonal_x, x, abs(col_idx - row_idx)) for col_idx, x in enumerate(row1) if col_idx != row_idx]
	)
	from datasets import load_dataset

	dataset = load_dataset("eli5")

	print(dataset)

	path = "/Users/danielk/ideaProjects/qoogle-experiments/data"
	trainfile = open(path + "/eli5/train.tsv", "w")
	testfile = open(path + "/eli5/test.tsv", "w")
	devfile = open(path + "/eli5/dev.tsv", "w")
	import json
	from os import listdir
	from os.path import isfile, join

	tasks_path = '/Users/danielk/ideaProjects/instructions-demo/app/app/static/tasks/'

	categories = {
	'task001_quoref_question_generation': 'Generation',
	'task002_quoref_answer_generation': 'Generation',
	'task003_mctaco_question_generation_event_duration': 'Question Generation',
	#!/usr/bin/env python

	"""Evaluate model predictions against target.
	Usage:
	evaluate_predictions.py --model_mixture_name=NAME --dataset_mixture_name=NAME --bucket_name=GOOGLE_CLOUD_BUCKET_NAME --eval_metric=METRIC_NAME [--model_size=SIZE] [--input_sequence_length=LEN] [--output_sequence_length=LEN]
	evaluate_predictions.py --eval_path=NAME --eval_metric=METRIC_NAME [--input_sequence_length=LEN] [--output_sequence_length=LEN]
	evaluate_predictions.py -h\| --help
	Options:
	-h --help Show this screen
	--model_mixture_name=NAME Name of the model whose predictions are to be evaluated
	import json
	import os
	import random

	from tqdm import tqdm

	with open("split.json") as f:
	split_ids = json.load(f)

	all_questions = {}
	# this file extracts the predictions of several existing summarization systems for XSUM dataset
	import json

	from datasets import load_dataset
	from tqdm import tqdm
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	dataset = load_dataset('xsum')
	total_len = len(dataset['test'])
	batch_size = 16
	from transformers import GPT2Tokenizer, GPT2LMHeadModel
	import torch
	import torch.nn.functional as F


	def embed_inputs(embedding, logits, device='cuda'):
	'''
	embeds inputs in a dense representation, before passing them to the model
	'''
	# typically we embed a one-hot vector. But here since we work we work with dense representations,
	# this is working with transformers === 4.2.1
	from transformers import GPT2Tokenizer, GPT2LMHeadModel
	import torch
	import torch.nn.functional as F


	def embed_inputs(embedding, logits, device='cuda'):
	'''
	embeds inputs in a dense representation, before passing them to the model
	'''