Skip to content

Instantly share code, notes, and snippets.

View darrenangle's full-sized avatar
🏋️

darren angle darrenangle

🏋️
View GitHub Profile
@darrenangle
darrenangle / choice_tree.py
Created June 3, 2024 13:44 — forked from wassname/choice_tree.py
for huggingface transformers sometime you want to constrain output to json schema and record the probabilities on choices/enums. I use it when rating, judging. It's much more efficient than sampling multiple times.
from jaxtyping import Float, Int
import torch
from torch.nn import functional as F
from torch import Tensor
from typing import List, Callable, Tuple, Dict, Optional
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer
def get_valid_next_choices(choices_tokens, current_tokens):
@darrenangle
darrenangle / grpo_demo.py
Created January 28, 2025 23:48 — forked from willccbb/grpo_demo.py
GRPO Llama-1B
# train_grpo.py
import re
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
# Load and prep dataset
SYSTEM_PROMPT = """