Skip to content

Instantly share code, notes, and snippets.

View lewtun's full-sized avatar
🤫
LLM whispering

lewtun

🤫
LLM whispering
View GitHub Profile
# pip install emoji
import argparse
from datasets import load_dataset
import emoji
def remove_emoji(text: str) -> str:
return emoji.replace_emoji(text, replace='').strip()
def format_messages(x):
emojis_found = False
@lewtun
lewtun / chat_template.jinja
Last active July 8, 2025 16:01
SmolLM3 chat template
{# ───── defaults ───── #}
{%- if enable_thinking is not defined -%}
{%- set enable_thinking = true -%}
{%- endif -%}
{# ───── reasoning mode ───── #}
{%- if enable_thinking -%}
{%- set reasoning_mode = "/think" -%}
{%- else -%}
{%- set reasoning_mode = "/no_think" -%}
@lewtun
lewtun / grpo_benchmark_v0.py
Created March 3, 2025 14:04
GRPO benchmarking
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, set_seed
import time
import torch
set_seed(0)
device = "cuda"
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen2.5-1.5B",
attn_implementation="flash_attention_2",
torch_dtype="bfloat16"
@lewtun
lewtun / grpo_vllm.py
Last active January 29, 2025 10:51
GRPO with vLLM demo
from datasets import load_dataset
from trl import GRPOConfig, GRPOTrainer
import random
"""Usage (on 8 x H100s):
pip install vllm==0.7.0 --extra-index-url https://download.pytorch.org/whl/cu121
pip install -e '.[dev]'
# DDP
accelerate launch --config_file examples/accelerate_configs/multi_gpu.yaml --num_processes 7 scratch/grpo_demo.py
@lewtun
lewtun / extract_boxed.py
Last active January 9, 2025 12:19
Simple parser to extract the \boxed{answer} parts from LLM completions
from typing import Optional
def extract_boxed_solution(text: str) -> Optional[str]:
"""
Extracts the content of the last `\boxed{}` in a given LaTeX-style text.
Args:
text (str): The input string containing LaTeX-style content.
Returns:
@lewtun
lewtun / sft_llama.py
Last active November 9, 2024 02:33
SFT Llama 3.1 8B - full training vs LoRA vs QLoRA
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
@lewtun
lewtun / dpo_winrate.py
Created September 24, 2024 20:45
DPO with WinRateCallback
# flake8: noqa
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
@lewtun
lewtun / dbrx-instruct-ifeval.json
Created April 24, 2024 21:48
DBRX-Instruct IFEval scores from LightEval
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"override_batch_size": 4,
"max_samples": null,
"job_id": "",
"start_time": 1163608.425196265,
"end_time": 1173616.769654949,
"total_evaluation_time_secondes": "10008.34445868386",
@lewtun
lewtun / view_details.py
Last active February 28, 2024 11:28
View LightEval predictions
"""
First install: pip install datasets pandas rich transformers
Usage:
# Loglikelihood evals
python view_details.py --filepath path/to/parquet/details
# Generative evals
python view_details.py --filepath path/to/parquet/details --is_generative
@lewtun
lewtun / sft_trainer.py
Last active April 21, 2025 16:04
Fine-tuning Mistral 7B with TRL & DeepSpeed ZeRO-3
# This is a modified version of TRL's `SFTTrainer` example (https://github.com/huggingface/trl/blob/main/examples/scripts/sft_trainer.py),
# adapted to run with DeepSpeed ZeRO-3 and Mistral-7B-V1.0. The settings below were run on 1 node of 8 x A100 (80GB) GPUs.
#
# Usage:
# - Install the latest transformers & accelerate versions: `pip install -U transformers accelerate`
# - Install deepspeed: `pip install deepspeed==0.9.5`
# - Install TRL from main: pip install git+https://github.com/huggingface/trl.git
# - Clone the repo: git clone github.com/huggingface/trl.git
# - Copy this Gist into trl/examples/scripts
# - Run from root of trl repo with: accelerate launch --config_file=examples/accelerate_configs/deepspeed_zero3.yaml --gradient_accumulation_steps 8 examples/scripts/sft_trainer.py