Thomas Wolf thomwolf

## gist:6aa3a2689f66ec2c8d28b281bdd01fe2
""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
    git clone https://github.com/myshell-ai/OpenVoice
    cd OpenVoice
    git clone https://huggingface.co/myshell-ai/OpenVoice
    cp -r OpenVoice/* .
    pip install whisper pynput pyaudio
"""
from dataclasses import dataclass
from typing import Optional
import random

## fast_speech_text_speech.py
""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
    git clone https://github.com/myshell-ai/OpenVoice
    cd OpenVoice
    git clone https://huggingface.co/myshell-ai/OpenVoice
    cp -r OpenVoice/* .
    pip install whisper pynput pyaudio
"""

from openai import OpenAI
import time

## loading_wikipedia.py
import os; import psutil; import timeit
from datasets import load_dataset

mem_before = psutil.Process(os.getpid()).memory_info().rss >> 20
wiki = load_dataset("wikipedia", "20200501.en", split='train')
mem_after = psutil.Process(os.getpid()).memory_info().rss >> 20
print(f"RAM memory used: {(mem_after - mem_before)} MB")

s = """batch_size = 1000
for i in range(0, len(wiki), batch_size):

## knowledge_distilation.py
import torch
import torch.nn as nn
from torch.optim import Optimizer

KD_loss = nn.KLDivLoss(reduction='batchmean')

def kd_step(teacher: nn.Module, student: nn.Module, temperature: float,
            inputs: torch.tensor, optimizer: Optimizer):
    teacher.eval()
    student.train()

## comparing-hidden-states.py
# Get the tensorflow and pytorch hidden-states as NumPy arrays
tensorflow_hidden_states = sess.run(feed_dict)
pytorch_hidden_states = pytorch_model(inputs)
pytorch_hidden_states = pytorch_hidden_states.cpu().detach().numpy()

# Compute the maximum absolute difference between hidden-states.
# Should be less than 1e-3. Typically around 1e-5/1e-6.
max_absolute_diff = np.amax(np.abs(tensorflow_hidden_states - pytorch_hidden_states))

## loading-weights-gpt-2.py
import re
import numpy as np
import tensorflow as tf

model = MyPyTorchGPT2()  # load the un-initialized PyTorch model we have created

# Retrieve weights from TF checkpoint
tf_path = os.path.abspath(gpt2_checkpoint_path)
init_vars = tf.train.list_variables(tf_path)
tf_vars = []

## gpt-2-main-forward.py
    def forward(self, input_ids):
        position_ids = torch.arange(0, input_ids.size(-1), dtype=torch.long, device=input_ids.device)
        position_ids = position_ids.unsqueeze(0).expand_as(input_ids)

        hidden_states = self.wte(input_ids) + self.wpe(position_ids)
        hidden_states = self.drop(hidden_states)

        for block in self.h:
            hidden_states = block(hidden_states)
        hidden_states = self.ln_f(hidden_states)

## gpt-2-block-pytorch.py
class Block(nn.Module):
    def __init__(self, n_ctx, config, scale=False):
        super(Block, self).__init__()
        nx = config.n_embd
        self.ln_1 = LayerNorm(nx, eps=config.layer_norm_epsilon)
        self.attn = Attention(nx, n_ctx, config, scale)
        self.ln_2 = LayerNorm(nx, eps=config.layer_norm_epsilon)
        self.mlp = MLP(4 * nx, config)

    def forward(self, x):

## gpt-2-block.py
def block(x, scope, *, past, hparams):
    with tf.variable_scope(scope):
        nx = x.shape[-1].value
        a, present = attn(norm(x, 'ln_1'), 'attn', nx, past=past, hparams=hparams)
        x = x + a
        m = mlp(norm(x, 'ln_2'), 'mlp', nx*4, hparams=hparams)
        x = x + m
        return x, present

## gpt-2-main-class.py
class GPT2Model(nn.Module):

def __init__(self, config):
        super(GPT2Model, self).__init__(config)

        self.wte = nn.Embedding(config.vocab_size, config.n_embd)
        self.wpe = nn.Embedding(config.n_positions, config.n_embd)

        self.drop = nn.Dropout(config.embd_pdrop)
        self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])
	""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
	git clone https://github.com/myshell-ai/OpenVoice
	cd OpenVoice
	git clone https://huggingface.co/myshell-ai/OpenVoice
	cp -r OpenVoice/* .
	pip install whisper pynput pyaudio
	"""
	from dataclasses import dataclass
	from typing import Optional
	import random
	import os; import psutil; import timeit
	from datasets import load_dataset

	mem_before = psutil.Process(os.getpid()).memory_info().rss >> 20
	wiki = load_dataset("wikipedia", "20200501.en", split='train')
	mem_after = psutil.Process(os.getpid()).memory_info().rss >> 20
	print(f"RAM memory used: {(mem_after - mem_before)} MB")

	s = """batch_size = 1000
	for i in range(0, len(wiki), batch_size):
	import torch
	import torch.nn as nn
	from torch.optim import Optimizer

	KD_loss = nn.KLDivLoss(reduction='batchmean')

	def kd_step(teacher: nn.Module, student: nn.Module, temperature: float,
	inputs: torch.tensor, optimizer: Optimizer):
	teacher.eval()
	student.train()
	# Get the tensorflow and pytorch hidden-states as NumPy arrays
	tensorflow_hidden_states = sess.run(feed_dict)
	pytorch_hidden_states = pytorch_model(inputs)
	pytorch_hidden_states = pytorch_hidden_states.cpu().detach().numpy()

	# Compute the maximum absolute difference between hidden-states.
	# Should be less than 1e-3. Typically around 1e-5/1e-6.
	max_absolute_diff = np.amax(np.abs(tensorflow_hidden_states - pytorch_hidden_states))
	import re
	import numpy as np
	import tensorflow as tf

	model = MyPyTorchGPT2() # load the un-initialized PyTorch model we have created

	# Retrieve weights from TF checkpoint
	tf_path = os.path.abspath(gpt2_checkpoint_path)
	init_vars = tf.train.list_variables(tf_path)
	tf_vars = []
	def forward(self, input_ids):
	position_ids = torch.arange(0, input_ids.size(-1), dtype=torch.long, device=input_ids.device)
	position_ids = position_ids.unsqueeze(0).expand_as(input_ids)

	hidden_states = self.wte(input_ids) + self.wpe(position_ids)
	hidden_states = self.drop(hidden_states)

	for block in self.h:
	hidden_states = block(hidden_states)
	hidden_states = self.ln_f(hidden_states)
	class Block(nn.Module):
	def __init__(self, n_ctx, config, scale=False):
	super(Block, self).__init__()
	nx = config.n_embd
	self.ln_1 = LayerNorm(nx, eps=config.layer_norm_epsilon)
	self.attn = Attention(nx, n_ctx, config, scale)
	self.ln_2 = LayerNorm(nx, eps=config.layer_norm_epsilon)
	self.mlp = MLP(4 * nx, config)

	def forward(self, x):
	def block(x, scope, *, past, hparams):
	with tf.variable_scope(scope):
	nx = x.shape[-1].value
	a, present = attn(norm(x, 'ln_1'), 'attn', nx, past=past, hparams=hparams)
	x = x + a
	m = mlp(norm(x, 'ln_2'), 'mlp', nx*4, hparams=hparams)
	x = x + m
	return x, present
	class GPT2Model(nn.Module):

	def __init__(self, config):
	super(GPT2Model, self).__init__(config)

	self.wte = nn.Embedding(config.vocab_size, config.n_embd)
	self.wpe = nn.Embedding(config.n_positions, config.n_embd)

	self.drop = nn.Dropout(config.embd_pdrop)
	self.h = nn.ModuleList([Block(config.n_ctx, config, scale=True) for _ in range(config.n_layer)])