Skip to content

Instantly share code, notes, and snippets.

View eustlb's full-sized avatar

eustlb

  • Hugging Face
  • Paris, France
View GitHub Profile
@eustlb
eustlb / draft_mapping.py
Created August 3, 2025 19:20
key mapping for parakeet integration
STATE_DICT_MAPPING = {
# Subsampling layer
r"encoder\.pre_encode\.": r"encoder.subsampling.",
# Subsampling specific mappings
r"encoder\.subsampling\.conv\.": r"encoder.subsampling.layers.",
r"encoder\.subsampling\.out\.": r"encoder.subsampling.linear.",
# # Positional encoding (skip pe buffer)
# r"encoder\.pos_enc\.pe$": None, # Skip buffer
r"encoder\.pos_enc\.": r"encoder.encode_positions.",
# Conformer layers - attention (NeMo already uses self_attn)
@eustlb
eustlb / reproducer_test_1b_model_integration.py
Last active August 1, 2025 12:38
reproducer for Parakeet Transformers integration tests
# To install NeMo, run:
# uv pip install git+https://github.com/NVIDIA/NeMo.git@b97e42b3dd1c9bcdf37c81c63220744af474c9c0
from nemo.collections.asr.models import ASRModel
import torch
import os
from datasets import load_dataset
import soundfile as sf
TMP_DIR = "./tmp"
@eustlb
eustlb / reproducer_test_1b_model_integration_batched.py
Last active August 1, 2025 12:38
reproducer for Parakeet Transformers integration tests
# To install NeMo, run:
# uv pip install git+https://github.com/NVIDIA/NeMo.git@b97e42b3dd1c9bcdf37c81c63220744af474c9c0
from nemo.collections.asr.models import ASRModel
import torch
import os
from datasets import load_dataset
import soundfile as sf
TMP_DIR = "./tmp"
@eustlb
eustlb / reproduce_weight_norms_diffs.py
Created July 28, 2025 10:02
reproducer to test weight norm differences
import torch
import torch.nn as nn
from torch.nn.utils.parametrizations import weight_norm
from torch.nn.utils.parametrize import remove_parametrizations
# Define dtypes to test
dtypes_to_test = [torch.float64, torch.float32, torch.float16]
for dtype in dtypes_to_test:
print(f"\nTesting with dtype: {dtype}")
@eustlb
eustlb / benchmark_weight_norm.py
Created July 28, 2025 09:44
benchmark weight norm
import torch
import torch.nn as nn
from torch.nn.utils.parametrizations import weight_norm
from torch.nn.utils.parametrize import remove_parametrizations
# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
# 1. Create conv layer and move to device
@eustlb
eustlb / reproduce_integration_tests_voxtral.py
Created July 17, 2025 10:53
reproduce Voxtral Transformers integration tests
from mistral_common.protocol.instruct.messages import TextChunk, AudioChunk, UserMessage, AssistantMessage, RawAudio
from mistral_common.audio import Audio
from huggingface_hub import hf_hub_download
from openai import OpenAI
# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://0.0.0.0:8000/v1"
client = OpenAI(
api_key=openai_api_key,
@eustlb
eustlb / infer_voxtral_librispeech.py
Created July 15, 2025 17:05
WER evals for Voxtral
from datasets import load_dataset, Audio
from transformers import VoxtralForConditionalGeneration, VoxtralProcessor
import os
import torch
from whisper.normalizers import EnglishTextNormalizer
import jiwer
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
torch_device = "cuda" if torch.cuda.is_available() else "cpu" # "cpu"
@eustlb
eustlb / infer_kyutai_stt_librispeech.py
Last active June 26, 2025 09:45
infer Kyutai STT on librispeech test clean for verifs
from datasets import load_dataset, Audio
from transformers import KyutaiSpeechToTextProcessor, KyutaiSpeechToTextForConditionalGeneration
import os
import torch
from whisper.normalizers import EnglishTextNormalizer
import jiwer
os.environ["CUDA_VISIBLE_DEVICES"] = "3"
torch_device = "cuda" if torch.cuda.is_available() else "cpu" # "cpu"
@eustlb
eustlb / reproducer_kyutai_speech_to_text_generate.py
Created June 21, 2025 07:04
reproducer for Kyutai stt Transformers integration, test `test_generation`
# ------ install moshi ------
# git clone https://github.com/kyutai-labs/moshi.git
# cd moshi && git checkout 0395bd6c9a95e899c397a68c75f300f3b5409b2c
# uv pip install -e .
# ----------------------------
import torch
from moshi import run_inference
args = {
@eustlb
eustlb / reproducer_kyutai_speech_to_text_generate_batched.py
Created June 21, 2025 07:03
reproducer for Kyutai stt Transformers integration, test `test_generation_batched`
# ------ install moshi ------
# git clone https://github.com/kyutai-labs/moshi.git
# cd moshi && git checkout 0395bd6c9a95e899c397a68c75f300f3b5409b2c
# uv pip install -e .
# ----------------------------
import torch
from moshi import run_inference
args = {