Daniel Khashabi danyaljj

## evaluate_gooaq.py
#!/usr/bin/env python

"""Evaluate model predictions against target.
Usage:
   evaluate_predictions.py --model_mixture_name=NAME --dataset_mixture_name=NAME --bucket_name=GOOGLE_CLOUD_BUCKET_NAME --eval_metric=METRIC_NAME [--model_size=SIZE] [--input_sequence_length=LEN] [--output_sequence_length=LEN]
   evaluate_predictions.py --eval_path=NAME --eval_metric=METRIC_NAME [--input_sequence_length=LEN] [--output_sequence_length=LEN]
   evaluate_predictions.py -h| --help
Options:
    -h --help                               Show this screen
   --model_mixture_name=NAME                Name of the model whose predictions are to be evaluated

## sample_gooaq_splits.py
import json
import os
import random

from tqdm import tqdm

with open("split.json") as f:
    split_ids = json.load(f)

all_questions = {}

## summarization_baselines.py
# this file extracts the predictions of several existing summarization systems for XSUM dataset
import json

from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

dataset = load_dataset('xsum')
total_len = len(dataset['test'])
batch_size = 16

## generation_quality_vs_prompt_peaky-ness.py
from torch.distributions import Categorical
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F


def embed_inputs(embedding, logits, device='cuda', print_entropy=False):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''

## gpt2_generation_embeddings.py
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''
    # typically we embed a one-hot vector. But here since we work we work with dense representations,

## gpt2-decoding.py
# this is working with transformers === 4.2.1
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''

## gpt2-decoding.py

from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''

## gist:4d92b3465a76392fbbc51aeac22ac15b
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''
    probs = F.softmax(logits, dim=-1)

## greedy_decoding_gpt2.py
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
from torch import nn


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''

## left_word.py
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F
from torch import nn


def embed_inputs(embedding, logits, device='cuda'):
    '''
    embeds inputs in a dense representation, before passing them to the model
    '''
	#!/usr/bin/env python

	"""Evaluate model predictions against target.
	Usage:
	evaluate_predictions.py --model_mixture_name=NAME --dataset_mixture_name=NAME --bucket_name=GOOGLE_CLOUD_BUCKET_NAME --eval_metric=METRIC_NAME [--model_size=SIZE] [--input_sequence_length=LEN] [--output_sequence_length=LEN]
	evaluate_predictions.py --eval_path=NAME --eval_metric=METRIC_NAME [--input_sequence_length=LEN] [--output_sequence_length=LEN]
	evaluate_predictions.py -h\| --help
	Options:
	-h --help Show this screen
	--model_mixture_name=NAME Name of the model whose predictions are to be evaluated
	import json
	import os
	import random

	from tqdm import tqdm

	with open("split.json") as f:
	split_ids = json.load(f)

	all_questions = {}
	# this file extracts the predictions of several existing summarization systems for XSUM dataset
	import json

	from datasets import load_dataset
	from tqdm import tqdm
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

	dataset = load_dataset('xsum')
	total_len = len(dataset['test'])
	batch_size = 16
	from torch.distributions import Categorical
	from transformers import GPT2Tokenizer, GPT2LMHeadModel
	import torch
	import torch.nn.functional as F


	def embed_inputs(embedding, logits, device='cuda', print_entropy=False):
	'''
	embeds inputs in a dense representation, before passing them to the model
	'''
	# this is working with transformers === 4.2.1
	from transformers import GPT2Tokenizer, GPT2LMHeadModel
	import torch
	import torch.nn.functional as F


	def embed_inputs(embedding, logits, device='cuda'):
	'''
	embeds inputs in a dense representation, before passing them to the model
	'''