Skip to content

Instantly share code, notes, and snippets.

View thomwolf's full-sized avatar
🚂
training

Thomas Wolf thomwolf

🚂
training
View GitHub Profile
@thomwolf
thomwolf / gpt-2-wikitext-103.py
Last active April 16, 2024 19:27
A very small and self-contained gist to train a GPT-2 transformer model on wikitext-103
# Copyright (c) 2019-present, Thomas Wolf.
# All rights reserved. This source code is licensed under the MIT-style license.
""" A very small and self-contained gist to train a GPT-2 transformer model on wikitext-103 """
import os
from collections import namedtuple
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from ignite.engine import Engine, Events
@thomwolf
thomwolf / fast_speech_text_speech.py
Last active April 15, 2024 22:31
speech to text to speech
""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
git clone https://github.com/myshell-ai/OpenVoice
cd OpenVoice
git clone https://huggingface.co/myshell-ai/OpenVoice
cp -r OpenVoice/* .
pip install whisper pynput pyaudio
"""
from openai import OpenAI
import time
""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
git clone https://github.com/myshell-ai/OpenVoice
cd OpenVoice
git clone https://huggingface.co/myshell-ai/OpenVoice
cp -r OpenVoice/* .
pip install whisper pynput pyaudio
"""
from dataclasses import dataclass
from typing import Optional
import random
@thomwolf
thomwolf / loading_wikipedia.py
Last active January 18, 2024 14:04
Load full English Wikipedia dataset in HuggingFace nlp library
import os; import psutil; import timeit
from datasets import load_dataset
mem_before = psutil.Process(os.getpid()).memory_info().rss >> 20
wiki = load_dataset("wikipedia", "20200501.en", split='train')
mem_after = psutil.Process(os.getpid()).memory_info().rss >> 20
print(f"RAM memory used: {(mem_after - mem_before)} MB")
s = """batch_size = 1000
for i in range(0, len(wiki), batch_size):
@thomwolf
thomwolf / gradient_accumulation.py
Last active January 16, 2024 02:38
PyTorch gradient accumulation training loop
model.zero_grad() # Reset gradients tensors
for i, (inputs, labels) in enumerate(training_set):
predictions = model(inputs) # Forward pass
loss = loss_function(predictions, labels) # Compute loss function
loss = loss / accumulation_steps # Normalize our loss (if averaged)
loss.backward() # Backward pass
if (i+1) % accumulation_steps == 0: # Wait for several backward steps
optimizer.step() # Now we can do an optimizer step
model.zero_grad() # Reset gradients tensors
if (i+1) % evaluation_steps == 0: # Evaluate the model when we...
@thomwolf
thomwolf / top-k-top-p.py
Last active January 2, 2024 07:43
Sample the next token from a probability distribution using top-k and/or nucleus (top-p) sampling
def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
""" Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
Args:
logits: logits distribution shape (vocabulary size)
top_k >0: keep only top k tokens with highest probability (top-k filtering).
top_p >0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
"""
assert logits.dim() == 1 # batch size 1 for now - could be updated for more but the code would be less clear
top_k = min(top_k, logits.size(-1)) # Safety check
@thomwolf
thomwolf / AdamW.py
Created July 3, 2018 21:20
Implements Adam algorithm with weight decay fix in PyTorch (paper: https://arxiv.org/abs/1711.05101)
from torch.optim import Optimizer
class AdamW(Optimizer):
"""
Implements Adam algorithm with weight decay fix in PyTorch
Paper: Fixing Weight Decay Regularization in Adam by Ilya Loshchilov, Frank Hutter
https://arxiv.org/abs/1711.05101
"""
def __init__(self, params, lr, b1=0.9, b2=0.999, e=1e-8, l2=0,
vector_l2=False, max_grad_norm=-1, **kwargs):
@thomwolf
thomwolf / parallel.py
Last active August 8, 2023 15:50
Data Parallelism in PyTorch for modules and losses
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang, Rutgers University, Email: zhang.hang@rutgers.edu
## Modified by Thomas Wolf, HuggingFace Inc., Email: thomas@huggingface.co
## Copyright (c) 2017-2018
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
"""Encoding Data Parallel"""
@thomwolf
thomwolf / MetaLearner.py
Last active June 29, 2023 10:06
A simple bare MetaLearner class in PyTorch
class MetaLearner(nn.Module):
""" Bare Meta-learner class
Should be added: intialization, hidden states, more control over everything
"""
def __init__(self, model):
super(MetaLearner, self).__init__()
self.weights = Parameter(torch.Tensor(1, 2))
def forward(self, forward_model, backward_model):
""" Forward optimizer with a simple linear neural net
@thomwolf
thomwolf / persona-chat.py
Last active February 28, 2023 07:25
Download and load persona-chat json dataset
import json
from pytorch_pretrained_bert import cached_path
url = "https://s3.amazonaws.com/datasets.huggingface.co/personachat/personachat_self_original.json"
# Download and load JSON dataset
personachat_file = cached_path(url)
with open(personachat_file, "r", encoding="utf-8") as f:
dataset = json.loads(f.read())