Skip to content

Instantly share code, notes, and snippets.

View priya-dwivedi's full-sized avatar

Priyanka Dwivedi priya-dwivedi

  • Deep Learning Consultant
  • Toronto, Canada
View GitHub Profile
@priya-dwivedi
priya-dwivedi / gec_model_inference.py
Created April 5, 2022 01:06
Inference using GEC model
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
model_name = 'deep-learning-analytics/GrammarCorrector'
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name).to(torch_device)
def correct_grammar(input_text,num_return_sequences):
batch = tokenizer([input_text],truncation=True,padding='max_length',max_length=64, return_tensors="pt").to(torch_device)
translated = model.generate(**batch,max_length=64,num_beams=4, num_return_sequences=num_return_sequences, temperature=1.5)
@priya-dwivedi
priya-dwivedi / gec_trainer.py
Created April 5, 2022 01:05
Training Arguments for GEC
# defining training related arguments
batch_size = 16
args = Seq2SeqTrainingArguments(output_dir="/content/drive/MyDrive/c4_200m/weights",
evaluation_strategy="steps",
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
learning_rate=2e-5,
num_train_epochs=1,
weight_decay=0.01,
save_total_limit=2,
@priya-dwivedi
priya-dwivedi / gec_tokenizer.py
Created April 5, 2022 01:03
Tokenizer for GEC model
class GrammarDataset(Dataset):
def __init__(self, dataset, tokenizer,print_text=False):
self.dataset = dataset
self.pad_to_max_length = False
self.tokenizer = tokenizer
self.print_text = print_text
self.max_len = 64
def __len__(self):
return len(self.dataset)
@priya-dwivedi
priya-dwivedi / triviaqa-t5-inference.py
Created September 30, 2020 18:31
Inference using Auto LM head
from transformers import AutoTokenizer, AutoModelWithLMHead
tokenizer = AutoTokenizer.from_pretrained("deep-learning-analytics/triviaqa-t5-base")
model = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/triviaqa-t5-base")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
text = "Who directed the movie Jaws?"
from transformers import AutoTokenizer, AutoModelWithLMHead
tokenizer = AutoTokenizer.from_pretrained("deep-learning-analytics/wikihow-t5-small")
model = AutoModelWithLMHead.from_pretrained("deep-learning-analytics/wikihow-t5-small")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
text = """"
Lack of fluids can lead to dry mouth, which is a leading cause of bad breath. Water
@priya-dwivedi
priya-dwivedi / wikihow-t5-tuner.py
Created September 5, 2020 13:01
Wikihow T5 Tuner
class T5FineTuner(pl.LightningModule):
def __init__(self, hparams):
super(T5FineTuner, self).__init__()
self.hparams = hparams
self.model = T5ForConditionalGeneration.from_pretrained(hparams.model_name_or_path)
self.tokenizer = T5Tokenizer.from_pretrained(hparams.tokenizer_name_or_path)
self.rouge_metric = load_metric('rouge')
if self.hparams.freeze_embeds:
self.freeze_embeds()
@priya-dwivedi
priya-dwivedi / wikihow-dataset.py
Created September 2, 2020 18:52
Pytorch Dataset for Wikihow
class wikihow(Dataset):
def __init__(self, tokenizer, type_path, num_samples, input_length, output_length, print_text=False):
self.dataset = load_dataset('wikihow', 'all', data_dir='data/', split=type_path)
if num_samples:
self.dataset = self.dataset.select(list(range(0, num_samples)))
self.input_length = input_length
self.tokenizer = tokenizer
self.output_length = output_length
self.print_text = print_text
@priya-dwivedi
priya-dwivedi / load_wikihow.py
Created September 2, 2020 15:58
T5 Load WikiHow Data
## Load Data from NLP Library
from nlp import load_dataset
dataset = load_dataset('wikihow', 'all', data_dir='data/')
print(dataset.keys())
print("Size of train dataset: ", dataset['train'].shape)
print("Size of Validation dataset: ", dataset['validation'].shape)
## Look at Sample Examples
print(dataset['train'][0].keys())
print(" Example of text: ", dataset['train'][0]['text'])
print(" Example of Summary: ", dataset['train'][0]['headline'])
@priya-dwivedi
priya-dwivedi / detr_class.py
Created June 8, 2020 17:06
DETR inference block
class DETRdemo(nn.Module):
"""
Demo DETR implementation.
Demo implementation of DETR in minimal number of lines, with the
following differences wrt DETR in the paper:
* learned positional encoding (instead of sine)
* positional encoding is passed at input (instead of attention)
* fc bbox predictor (instead of MLP)
The model achieves ~40 AP on COCO val5k and runs at ~28 FPS on Tesla V100.
@priya-dwivedi
priya-dwivedi / traverse_kg.py
Created May 28, 2020 15:37
Traverse a Knowledge Graph
def get_max_degree_node(list_of_nodes_to_eliminate, G):
max_degree=0
all_remaining_nodes = [x for x in G.nodes() if x not in list_of_nodes_to_eliminate]
max_node=all_remaining_nodes[0]
for node in all_remaining_nodes:
degree = G.degree(node)
if degree>max_degree:
max_degree = degree
max_node = node
return max_degree, max_node