Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python3
"""
Complete training pipeline for Qwen3-4B with Super Weight Preservation
Supports HuggingFace datasets with automatic chat template conversion
"""
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import json
@data2json
data2json / delta.py
Created May 2, 2025 16:50
Parameter Deltas enables zero-cost transfer of post-training capabilities to new base models
import torch
from transformers import AutoModelForCausalLM
# Load models
llama3_base = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B")
llama3_inst = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
llama31_base = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
# Calculate Δθ = θpost - θbase
delta_params = {}
@data2json
data2json / GSM-OK.py
Created February 12, 2025 04:25
VLLM GSM-8K Test Dataset Evaluation
import argparse
from typing import List, Dict, Any, Optional
from vllm import EngineArgs, LLMEngine, RequestOutput, SamplingParams
from datasets import load_dataset
from tqdm import tqdm
import json
import re
from dataclasses import dataclass
from vllm.utils import FlexibleArgumentParser
@data2json
data2json / GRPO-Function_Calling_Qwen2.5-0.5B-Instruct.py
Created February 1, 2025 22:04
GRPO-Function_Calling_Qwen2.5-0.5B-Instruct
import re
import torch
import numpy as np
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
class SimpleCalculator:
"""Simple calculator tool implementation"""
@data2json
data2json / grpo_demo.py
Created February 1, 2025 18:19 — forked from willccbb/grpo_demo.py
GRPO Llama-1B
# train_grpo.py
import re
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig
from trl import GRPOConfig, GRPOTrainer
# Load and prep dataset
import asyncio
import os
import json
from openai import AsyncOpenAI
from datetime import datetime
import random
# Set up the client with custom base URL
client = AsyncOpenAI(base_url="http://0.0.0.0:8000/v1/")
essobi@thegreenbox:~/LLaMA-Factory$ more OKG.py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load model and tokenizer
checkpoint = "meta-llama/Meta-Llama-3.1-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(
checkpoint, torch_dtype=torch.bfloat16, device_map="auto"
)
curl -X POST http://192.168.50.146:8000/v1/chat/completions \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{
"messages": [
{
"content": "You are a helpful assistant that can check the weather. Use the get_weather tool when asked about weather conditions. If you choose to call a function ONLY reply in the following format: <{sta
rt_tag}={function_name}>{parameters}{end_tag} where start_tag => <function parameters => a JSON dict with the function argument name as key and function argument value as value. end_tag => </function>",
"role": "system"
curl -s -X POST 'http://0.0.0.0:8000/v1/chat/completions' -H "Content-Type: application/json" -d '{
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "Environment: ipython\nTools: brave_search, wolfram_alpha\n\nCutting Knowledge Date: December 2023\nToday Date: 23 Jul 2024\n\nYou are a helpful Assistant."
},
{
"role": "user",
"content": "Can you help me solve this equation: x^3 - 4x^2 + 6x - 24 = 0"
@data2json
data2json / t.py
Last active January 12, 2025 14:00
T - The missing LLM Unix Token Tool
#!/usr/bin/env python
# t - The missing LLM token counting and splitting tool for UNIX
import argparse
import sys
from typing import Optional, List
import math
import os
import tiktoken