Priyanka Dwivedi priya-dwivedi

## gec_model_inference.py
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
model_name = 'deep-learning-analytics/GrammarCorrector'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(torch_device)

def correct_grammar(input_text,num_return_sequences):
  batch = tokenizer([input_text],truncation=True,padding='max_length',max_length=64, return_tensors="pt").to(torch_device)
  translated = model.generate(**batch,max_length=64,num_beams=4, num_return_sequences=num_return_sequences, temperature=1.5)

## gec_trainer.py
# defining training related arguments
batch_size = 16
args = Seq2SeqTrainingArguments(output_dir="/content/drive/MyDrive/c4_200m/weights",
                        evaluation_strategy="steps",
                        per_device_train_batch_size=batch_size,
                        per_device_eval_batch_size=batch_size,
                        learning_rate=2e-5,
                        num_train_epochs=1,
                        weight_decay=0.01,
                        save_total_limit=2,

## gec_tokenizer.py
class GrammarDataset(Dataset):
    def __init__(self, dataset, tokenizer,print_text=False):
        self.dataset = dataset
        self.pad_to_max_length = False
        self.tokenizer = tokenizer
        self.print_text = print_text
        self.max_len = 64

    def __len__(self):
        return len(self.dataset)

## triviaqa-t5-inference.py
from transformers import AutoTokenizer, AutoModelWithLMHead

tokenizer = AutoTokenizer.from_pretrained("deep-learning-analytics/triviaqa-t5-base")
model = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/triviaqa-t5-base")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

text = "Who directed the movie Jaws?"

## wikihow-t5-testing.py
from transformers import AutoTokenizer, AutoModelWithLMHead

tokenizer = AutoTokenizer.from_pretrained("deep-learning-analytics/wikihow-t5-small")
model = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/wikihow-t5-small")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

text = """"
Lack of fluids can lead to dry mouth, which is a leading cause of bad breath. Water

## wikihow-t5-tuner.py
class T5FineTuner(pl.LightningModule):
    def __init__(self, hparams):
        super(T5FineTuner, self).__init__()
        self.hparams = hparams
        self.model = T5ForConditionalGeneration.from_pretrained(hparams.model_name_or_path)
        self.tokenizer = T5Tokenizer.from_pretrained(hparams.tokenizer_name_or_path)
        self.rouge_metric = load_metric('rouge')

        if self.hparams.freeze_embeds:
            self.freeze_embeds()

## wikihow-dataset.py
class wikihow(Dataset):
    def __init__(self, tokenizer, type_path, num_samples, input_length, output_length, print_text=False):
        self.dataset =  load_dataset('wikihow', 'all', data_dir='data/', split=type_path)
        if num_samples:
            self.dataset = self.dataset.select(list(range(0, num_samples)))
        self.input_length = input_length
        self.tokenizer = tokenizer
        self.output_length = output_length
        self.print_text = print_text


## load_wikihow.py
## Load Data from NLP Library
from nlp import load_dataset
dataset = load_dataset('wikihow', 'all', data_dir='data/')
print(dataset.keys())
print("Size of train dataset: ", dataset['train'].shape)
print("Size of Validation dataset: ", dataset['validation'].shape)
## Look at Sample Examples
print(dataset['train'][0].keys())
print(" Example of text: ", dataset['train'][0]['text'])
print(" Example of Summary: ", dataset['train'][0]['headline'])

## detr_class.py
class DETRdemo(nn.Module):
    """
    Demo DETR implementation.

    Demo implementation of DETR in minimal number of lines, with the
    following differences wrt DETR in the paper:
    * learned positional encoding (instead of sine)
    * positional encoding is passed at input (instead of attention)
    * fc bbox predictor (instead of MLP)
    The model achieves ~40 AP on COCO val5k and runs at ~28 FPS on Tesla V100.

## traverse_kg.py
def get_max_degree_node(list_of_nodes_to_eliminate, G):
	max_degree=0
	all_remaining_nodes = [x for x in G.nodes() if x not in list_of_nodes_to_eliminate]
	max_node=all_remaining_nodes[0]
	for node in all_remaining_nodes:
    		degree = G.degree(node)
    		if degree>max_degree:
        			max_degree = degree
        			max_node = node
	return max_degree, max_node
	import torch
	from transformers import T5Tokenizer, T5ForConditionalGeneration
	model_name = 'deep-learning-analytics/GrammarCorrector'
	torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
	tokenizer = T5Tokenizer.from_pretrained(model_name)
	model = T5ForConditionalGeneration.from_pretrained(model_name).to(torch_device)

	def correct_grammar(input_text,num_return_sequences):
	batch = tokenizer([input_text],truncation=True,padding='max_length',max_length=64, return_tensors="pt").to(torch_device)
	translated = model.generate(**batch,max_length=64,num_beams=4, num_return_sequences=num_return_sequences, temperature=1.5)
	# defining training related arguments
	batch_size = 16
	args = Seq2SeqTrainingArguments(output_dir="/content/drive/MyDrive/c4_200m/weights",
	evaluation_strategy="steps",
	per_device_train_batch_size=batch_size,
	per_device_eval_batch_size=batch_size,
	learning_rate=2e-5,
	num_train_epochs=1,
	weight_decay=0.01,
	save_total_limit=2,
	class GrammarDataset(Dataset):
	def __init__(self, dataset, tokenizer,print_text=False):
	self.dataset = dataset
	self.pad_to_max_length = False
	self.tokenizer = tokenizer
	self.print_text = print_text
	self.max_len = 64

	def __len__(self):
	return len(self.dataset)
	from transformers import AutoTokenizer, AutoModelWithLMHead

	tokenizer = AutoTokenizer.from_pretrained("deep-learning-analytics/triviaqa-t5-base")
	model = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/triviaqa-t5-base")

	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	model = model.to(device)

	text = "Who directed the movie Jaws?"
	class T5FineTuner(pl.LightningModule):
	def __init__(self, hparams):
	super(T5FineTuner, self).__init__()
	self.hparams = hparams
	self.model = T5ForConditionalGeneration.from_pretrained(hparams.model_name_or_path)
	self.tokenizer = T5Tokenizer.from_pretrained(hparams.tokenizer_name_or_path)
	self.rouge_metric = load_metric('rouge')

	if self.hparams.freeze_embeds:
	self.freeze_embeds()
	class wikihow(Dataset):
	def __init__(self, tokenizer, type_path, num_samples, input_length, output_length, print_text=False):
	self.dataset = load_dataset('wikihow', 'all', data_dir='data/', split=type_path)
	if num_samples:
	self.dataset = self.dataset.select(list(range(0, num_samples)))
	self.input_length = input_length
	self.tokenizer = tokenizer
	self.output_length = output_length
	self.print_text = print_text
	## Load Data from NLP Library
	from nlp import load_dataset
	dataset = load_dataset('wikihow', 'all', data_dir='data/')
	print(dataset.keys())
	print("Size of train dataset: ", dataset['train'].shape)
	print("Size of Validation dataset: ", dataset['validation'].shape)
	## Look at Sample Examples
	print(dataset['train'][0].keys())
	print(" Example of text: ", dataset['train'][0]['text'])
	print(" Example of Summary: ", dataset['train'][0]['headline'])
	class DETRdemo(nn.Module):
	"""
	Demo DETR implementation.

	Demo implementation of DETR in minimal number of lines, with the
	following differences wrt DETR in the paper:
	* learned positional encoding (instead of sine)
	* positional encoding is passed at input (instead of attention)
	* fc bbox predictor (instead of MLP)
	The model achieves ~40 AP on COCO val5k and runs at ~28 FPS on Tesla V100.
	def get_max_degree_node(list_of_nodes_to_eliminate, G):
	max_degree=0
	all_remaining_nodes = [x for x in G.nodes() if x not in list_of_nodes_to_eliminate]
	max_node=all_remaining_nodes[0]
	for node in all_remaining_nodes:
	degree = G.degree(node)
	if degree>max_degree:
	max_degree = degree
	max_node = node
	return max_degree, max_node