Skip to content

Instantly share code, notes, and snippets.

View LysandreJik's full-sized avatar
😀

Lysandre Debut LysandreJik

😀
View GitHub Profile
@LysandreJik
LysandreJik / broken-tokenizer.ipynb
Created February 25, 2021 20:36
Broken Tokenizer
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import collections
from configure_pretraining import PretrainingConfig
from run_pretraining import PretrainingModel
from pretrain.pretrain_data import get_input_fn, Inputs
import tensorflow as tf
import torch
from model import modeling
from transformers.modeling_electra import ElectraModel, ElectraGenerator, ElectraDiscriminator, load_tf_weights_in_electra
from transformers import BertConfig
@LysandreJik
LysandreJik / training_gpt2_lmhead_model.py
Created December 16, 2019 22:34
Training GPT-2 LM Head model in Keras
from transformers import GPT2Tokenizer, TFGPT2LMHeadModel
import tensorflow as tf
model = TFGPT2LMHeadModel.from_pretrained("distilgpt2")
tokenizer = GPT2Tokenizer.from_pretrained("distilgpt2")
text = """
A SQUAT grey building of only thirty-four stories. Over the main entrance the
words, CENTRAL LONDON HATCHERY AND CONDITIONING CENTRE,
and, in a shield, the World State’s motto, COMMUNITY, IDENTITY, STABILITY.
@LysandreJik
LysandreJik / save_hub_checkpoint.py
Created November 8, 2019 16:19
Save a HUB model to a checkpoint
import tensorflow as tf
import tensorflow_hub as hub
model_size = 'xlarge'.upper()
version = 2
model = hub.Module("https://tfhub.dev/google/albert_{}/{}".format(model_size.lower(), version), trainable=False)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
@LysandreJik
LysandreJik / compare_albert.py
Created November 8, 2019 16:18
Comparing ALBERT TF1 and HUB
import tensorflow_hub as hub
import tensorflow as tf
import modeling
import os
import numpy as np
import tokenization
# Model size and paths
model_size = 'large'.upper()
version = 2
@LysandreJik
LysandreJik / cuda_10_install.sh
Created October 24, 2019 13:37
Uninstall cuda 10.1 and install cuda 10.0 instead on Ubuntu 18.04
sudo apt remove cuda
wget https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux
sudo sh cuda_10.0.130_410.48_linux.run
export PATH=$PATH:/usr/local/cuda/bin
nvcc --version
{
"attention_probs_dropout_prob": 0.1,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"max_position_embeddings": 512,
"num_attention_heads": 12,
"num_hidden_layers": 12,
model = TFDistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased")
tokenizer = DistilbertTokenizer.from_pretrained("distilbert-base-uncased")
model = TFBertForSequenceClassification.from_pretrained("bert-base-cased")
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
data = tensorflow_datasets.load("glue/mrpc")
train_dataset = data["train"]
train_dataset = glue_convert_examples_to_features(train_dataset, tokenizer, 128, 'mrpc')
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
@LysandreJik
LysandreJik / benchmark.py
Last active October 16, 2019 19:18
Benchmarking transformers
############################################################
############################################################
# EDITED AFTER FEEDBACK REGARDING THE TENSORFLOW INFERENCE #
############################################################
############################################################