This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import numpy as np | |
import tensorflow as tf | |
model = MyPyTorchGPT2() # load the un-initialized PyTorch model we have created | |
# Retrieve weights from TF checkpoint | |
tf_path = os.path.abspath(gpt2_checkpoint_path) | |
init_vars = tf.train.list_variables(tf_path) | |
tf_vars = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We will use 5 special tokens: | |
# - <bos> to indicate the start of the sequence | |
# - <eos> to indicate the end of the sequence | |
# - <speaker1> to indicate the beginning and the tokens of an utterance from the user | |
# - <speaker2> to indicate the beginning and the tokens of an utterance from the bot | |
# - <pad> as a padding token to build batches of sequences | |
SPECIAL_TOKENS = ["<bos>", "<eos>", "<speaker1>", "<speaker2>", "<pad>"] | |
# We can add these special tokens to the vocabulary and the embeddings of the model: | |
tokenizer.set_special_tokens(SPECIAL_TOKENS) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from torch.utils.data.distributed import DistributedSampler | |
from torch.utils.data import DataLoader | |
# Each process runs on 1 GPU device specified by the local_rank argument. | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--local_rank", type=int) | |
args = parser.parse_args() | |
# Initializes the distributed backend which will take care of sychronizing nodes/GPUs | |
torch.distributed.init_process_group(backend='nccl') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
from torch.optim import Optimizer | |
KD_loss = nn.KLDivLoss(reduction='batchmean') | |
def kd_step(teacher: nn.Module, student: nn.Module, temperature: float, | |
inputs: torch.tensor, optimizer: Optimizer): | |
teacher.eval() | |
student.train() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def init_weights(self): | |
""" | |
Here we reproduce Keras default initialization weights to initialize Embeddings/LSTM weights | |
""" | |
ih = (param.data for name, param in self.named_parameters() if 'weight_ih' in name) | |
hh = (param.data for name, param in self.named_parameters() if 'weight_hh' in name) | |
b = (param.data for name, param in self.named_parameters() if 'bias' in name) | |
nn.init.uniform(self.embed.weight.data, a=-0.5, b=0.5) | |
for t in ih: | |
nn.init.xavier_uniform(t) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Attention(Module): | |
""" | |
Computes a weighted average of channels across timesteps (1 parameter pr. channel). | |
""" | |
def __init__(self, attention_size, return_attention=False): | |
""" Initialize the attention layer | |
# Arguments: | |
attention_size: Size of the attention vector. | |
return_attention: If true, output will include the weight for each input token | |
used for the prediction |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# input_seqs is a batch of input sequences as a numpy array of integers (word indices in vocabulary) padded with zeroas | |
input_seqs = Variable(torch.from_numpy(input_seqs.astype('int64')).long()) | |
# First: order the batch by decreasing sequence length | |
input_lengths = torch.LongTensor([torch.max(input_seqs[i, :].data.nonzero()) + 1 for i in range(input_seqs.size()[0])]) | |
input_lengths, perm_idx = input_lengths.sort(0, descending=True) | |
input_seqs = input_seqs[perm_idx][:, :input_lengths.max()] | |
# Then pack the sequences | |
packed_input = pack_padded_sequence(input_seqs, input_lengths.cpu().numpy(), batch_first=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from cymem.cymem cimport Pool | |
from random import random | |
cdef struct Rectangle: | |
float w | |
float h | |
cdef int check_rectangles(Rectangle* rectangles, int n_rectangles, float threshold): | |
cdef int n_out = 0 | |
# C arrays contain no size information => we need to give it explicitly |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cProfile | |
import pstats | |
import my_slow_module | |
cProfile.run('my_slow_module.run()', 'restats') | |
p = pstats.Stats('restats') | |
p.sort_stats('cumulative').print_stats(30) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
predictions = model(inputs) # Forward pass | |
loss = loss_function(predictions, labels) # Compute loss function | |
loss.backward() # Backward pass | |
optimizer.step() # Optimizer step | |
predictions = model(inputs) # Forward pass with new parameters |