Skip to content

Instantly share code, notes, and snippets.

Avatar

Thomas Wolf thomwolf

View GitHub Profile
@thomwolf
thomwolf / loading_wikipedia.py
Last active Dec 28, 2020
Load full English Wikipedia dataset in HuggingFace nlp library
View loading_wikipedia.py
import os; import psutil; import timeit
from datasets import load_dataset
mem_before = psutil.Process(os.getpid()).memory_info().rss >> 20
wiki = load_dataset("wikipedia", "20200501.en", split='train')
mem_after = psutil.Process(os.getpid()).memory_info().rss >> 20
print(f"RAM memory used: {(mem_after - mem_before)} MB")
s = """batch_size = 1000
for i in range(0, len(wiki), batch_size):
@thomwolf
thomwolf / knowledge_distilation.py
Last active Apr 9, 2021
Knowledge Distilation
View knowledge_distilation.py
import torch
import torch.nn as nn
from torch.optim import Optimizer
KD_loss = nn.KLDivLoss(reduction='batchmean')
def kd_step(teacher: nn.Module, student: nn.Module, temperature: float,
inputs: torch.tensor, optimizer: Optimizer):
teacher.eval()
student.train()
@thomwolf
thomwolf / comparing-hidden-states.py
Created Aug 9, 2019
Compare the hidden-states of the TensorFlow and PyTorch models
View comparing-hidden-states.py
# Get the tensorflow and pytorch hidden-states as NumPy arrays
tensorflow_hidden_states = sess.run(feed_dict)
pytorch_hidden_states = pytorch_model(inputs)
pytorch_hidden_states = pytorch_hidden_states.cpu().detach().numpy()
# Compute the maximum absolute difference between hidden-states.
# Should be less than 1e-3. Typically around 1e-5/1e-6.
max_absolute_diff = np.amax(np.abs(tensorflow_hidden_states - pytorch_hidden_states))
@thomwolf
thomwolf / loading-weights-gpt-2.py
Last active Aug 10, 2019
Loading TensorFlow weights in a PyTorch model
View loading-weights-gpt-2.py
import re
import numpy as np
import tensorflow as tf
model = MyPyTorchGPT2() # load the un-initialized PyTorch model we have created
# Retrieve weights from TF checkpoint
tf_path = os.path.abspath(gpt2_checkpoint_path)
init_vars = tf.train.list_variables(tf_path)
tf_vars = []
@thomwolf
thomwolf / gpt-2-main-forward.py
Created Aug 9, 2019
Main forward pass for GPT-2
View gpt-2-main-forward.py
def forward(self, input_ids):
position_ids = torch.arange(0, input_ids.size(-1), dtype=torch.long, device=input_ids.device)
position_ids = position_ids.unsqueeze(0).expand_as(input_ids)
hidden_states = self.wte(input_ids) + self.wpe(position_ids)
hidden_states = self.drop(hidden_states)
for block in self.h:
hidden_states = block(hidden_states)
hidden_states = self.ln_f(hidden_states)
@thomwolf
thomwolf / gpt-2-block-pytorch.py
Created Aug 8, 2019
GPT-2 PyTorch block module
View gpt-2-block-pytorch.py
class Block(nn.Module):
def __init__(self, n_ctx, config, scale=False):
super(Block, self).__init__()
nx = config.n_embd
self.ln_1 = LayerNorm(nx, eps=config.layer_norm_epsilon)
self.attn = Attention(nx, n_ctx, config, scale)
self.ln_2 = LayerNorm(nx, eps=config.layer_norm_epsilon)
self.mlp = MLP(4 * nx, config)
def forward(self, x):
@thomwolf
thomwolf / gpt-2-block.py
Created Aug 8, 2019
GPT-2 TensorFlow block class
View gpt-2-block.py
def block(x, scope, *, past, hparams):
with tf.variable_scope(scope):
nx = x.shape[-1].value
a, present = attn(norm(x, 'ln_1'), 'attn', nx, past=past, hparams=hparams)
x = x + a
m = mlp(norm(x, 'ln_2'), 'mlp', nx*4, hparams=hparams)
x = x + m
return x, present
@thomwolf
thomwolf / gpt-2-main-class.py
Last active Aug 9, 2019
GPT-2 main model class
View gpt-2-main-class.py
class GPT2Model(nn.Module):
def __init__(self, config):
super(GPT2Model, self).__init__(config)
self.wte = nn.Embedding(config.vocab_size, config.n_embd)
self.wpe = nn.Embedding(config.n_positions, config.n_embd)
self.drop = nn.Dropout(config.embd_pdrop)
self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])
@thomwolf
thomwolf / read_checkpoint.py
Created Aug 8, 2019
Read a TensorFlow checkpoint
View read_checkpoint.py
import os
from pprint import pprint
import tensorflow as tf
tf_path = os.path.abspath('./models/117M/model.ckpt') # Path to our TensorFlow checkpoint
tf_vars = tf.train.list_variables(tf_path)
pprint(tf_vars)
@thomwolf
thomwolf / get_gpt_2.sh
Created Aug 8, 2019
Retrieve OpenAI GPT-2 model
View get_gpt_2.sh
git clone https://github.com/openai/gpt-2.git
cd gpt-2
python download_model.py 117M