This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
model_name = 'deep-learning-analytics/GrammarCorrector' | |
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
tokenizer = T5Tokenizer.from_pretrained(model_name) | |
model = T5ForConditionalGeneration.from_pretrained(model_name).to(torch_device) | |
def correct_grammar(input_text,num_return_sequences): | |
batch = tokenizer([input_text],truncation=True,padding='max_length',max_length=64, return_tensors="pt").to(torch_device) | |
translated = model.generate(**batch,max_length=64,num_beams=4, num_return_sequences=num_return_sequences, temperature=1.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# defining training related arguments | |
batch_size = 16 | |
args = Seq2SeqTrainingArguments(output_dir="/content/drive/MyDrive/c4_200m/weights", | |
evaluation_strategy="steps", | |
per_device_train_batch_size=batch_size, | |
per_device_eval_batch_size=batch_size, | |
learning_rate=2e-5, | |
num_train_epochs=1, | |
weight_decay=0.01, | |
save_total_limit=2, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class GrammarDataset(Dataset): | |
def __init__(self, dataset, tokenizer,print_text=False): | |
self.dataset = dataset | |
self.pad_to_max_length = False | |
self.tokenizer = tokenizer | |
self.print_text = print_text | |
self.max_len = 64 | |
def __len__(self): | |
return len(self.dataset) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoTokenizer, AutoModelWithLMHead | |
tokenizer = AutoTokenizer.from_pretrained("deep-learning-analytics/triviaqa-t5-base") | |
model = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/triviaqa-t5-base") | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
model = model.to(device) | |
text = "Who directed the movie Jaws?" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoTokenizer, AutoModelWithLMHead | |
tokenizer = AutoTokenizer.from_pretrained("deep-learning-analytics/wikihow-t5-small") | |
model = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/wikihow-t5-small") | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
model = model.to(device) | |
text = """" | |
Lack of fluids can lead to dry mouth, which is a leading cause of bad breath. Water |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class T5FineTuner(pl.LightningModule): | |
def __init__(self, hparams): | |
super(T5FineTuner, self).__init__() | |
self.hparams = hparams | |
self.model = T5ForConditionalGeneration.from_pretrained(hparams.model_name_or_path) | |
self.tokenizer = T5Tokenizer.from_pretrained(hparams.tokenizer_name_or_path) | |
self.rouge_metric = load_metric('rouge') | |
if self.hparams.freeze_embeds: | |
self.freeze_embeds() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class wikihow(Dataset): | |
def __init__(self, tokenizer, type_path, num_samples, input_length, output_length, print_text=False): | |
self.dataset = load_dataset('wikihow', 'all', data_dir='data/', split=type_path) | |
if num_samples: | |
self.dataset = self.dataset.select(list(range(0, num_samples))) | |
self.input_length = input_length | |
self.tokenizer = tokenizer | |
self.output_length = output_length | |
self.print_text = print_text | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Load Data from NLP Library | |
from nlp import load_dataset | |
dataset = load_dataset('wikihow', 'all', data_dir='data/') | |
print(dataset.keys()) | |
print("Size of train dataset: ", dataset['train'].shape) | |
print("Size of Validation dataset: ", dataset['validation'].shape) | |
## Look at Sample Examples | |
print(dataset['train'][0].keys()) | |
print(" Example of text: ", dataset['train'][0]['text']) | |
print(" Example of Summary: ", dataset['train'][0]['headline']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class DETRdemo(nn.Module): | |
""" | |
Demo DETR implementation. | |
Demo implementation of DETR in minimal number of lines, with the | |
following differences wrt DETR in the paper: | |
* learned positional encoding (instead of sine) | |
* positional encoding is passed at input (instead of attention) | |
* fc bbox predictor (instead of MLP) | |
The model achieves ~40 AP on COCO val5k and runs at ~28 FPS on Tesla V100. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_max_degree_node(list_of_nodes_to_eliminate, G): | |
max_degree=0 | |
all_remaining_nodes = [x for x in G.nodes() if x not in list_of_nodes_to_eliminate] | |
max_node=all_remaining_nodes[0] | |
for node in all_remaining_nodes: | |
degree = G.degree(node) | |
if degree>max_degree: | |
max_degree = degree | |
max_node = node | |
return max_degree, max_node |
NewerOlder