Shuchang Zhou zsc

## gist:103bf2c9b467e36befde060801c30b57
#!/bin/bash

# tmux 会话、窗口和面板的名称
TMUX_SESSION="2"
TMUX_WINDOW="mon"
TMUX_PANE="0"  # 你想监控的面板编号（通常是 0）

# 定义你想要发送的命令
COMMAND="just say hi and nothing else"

## gist:58a3bd742dcedd545a39559a48b47cc7
import json
import re
import time
import unittest
from collections import deque

# Characters that are plausible candidates for insertion/replacement in a broken JSON string.
PLAUSIBLE_CHARS = '"{}[],: '

def _apply_simple_fixes(json_str: str) -> tuple[str, list[str]]:

## gist:9269031abf3cab01c5030dcafaff7e69
python + single-file html 生成一个prompt tune Qwen2.5-1.5B-Instruct （注意不是 Qwen2-1.5B-Instruct）模型的 demo。测试用 gsm-8k的 test split 取 200 题。
* UI 训练 tab
网页可起停训练（支持从 checkpoint continue train）并单独 div 显示 log（不要浮窗）。不要有弹窗。
使用 tensorboard 保存 train loss, train/test accuracy 等。
实验名包含所有重要超参并带时间戳防覆盖。
* UI 测试 tab
可以选择模型 checkpoint 测试。

训练在 4090 bfloat16 进行。 Prompt tuning 直接实现不用 PEFT 库

## gist:93e7c6acb39f30ec12bf362a92fb843e

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                zsc
                / gist:93e7c6acb39f30ec12bf362a92fb843e
            
            
              Last active
              May 14, 2025 02:58
            
          
The play unfolds in an academic conference room, named "The Conference of Learning Innovators." Five experts in the field of Reinforcement Learning (RL) sit around a polished mahogany table. Their names are Professor Ada Sutton, a renowned expert in deep reinforcement learning; Dr. Leo Kael, specializing in policy gradient methods; Dr. Nova Lane, an expert in safe and robust RL algorithms; Dr. Max Vold, a specialist in value-based methods; and Dr. Iris Nash, known for her work in algorithmic optimization in RL.
Professor Ada Sutton, leading the discussion, begins:
Ada: Gentlemen, ladies, today we convene to illuminate the intricacies of the Proximal Policy Optimization (PPO) algorithm. Dr. Kael, since you specialize in policy gradient methods, would you care to begin by establishing what problem PPO aims to solve within the RL domain?
Leo: Ada, an insightful starting point. At the heart of Reinforcement Learning, we have agents learning to make decisions—formulated in terms of policies—to ma

  
## prepare_data_for_gpt3_fine_tune.py
import random
import sys

# We are going to fine-tune OpenAI Davinci on a given text file.
# to generate "prompt, completion" pairs for training (as a CSV file).

fname = sys.argv[1]

def create_csv_for_gpt3(fname):
    with open(fname) as f:

## gpt3.5_prompt
def gpt_3_5_prompt(func: str) -> List[dict]:
    return [
        {
            'role': 'system',
            'content': f'''
You are an intelligent programmer. You must complete the python function given to you by the user. And you must follow the format they present when giving your answer!

You can only respond with comments and actual code, no free-flowing text (unless in a comment).'''
        },
        {

## clean_sqlite3.sh
sed -i -e 's/PRAGMA foreign_keys=OFF;//g' -e 's/BEGIN TRANSACTION;//g' -e 's/COMMIT;//g' -e 's/\"/`/g' mydatabase.sql
sed -i 's/AUTOINCREMENT/AUTO_INCREMENT/g' mydatabase.sql
sed -i 's/DEFERRABLE INITIALLY DEFERRED//g' mydatabase.sql
mysql -u askbotuser -p --init-command="DROP DATABASE askbot;CREATE DATABASE askbot;SET foreign_key_checks = 0;" askbot < mydatabase.sql

## answer_bard.py
import os
import json
from Bard import Chatbot
from tqdm import tqdm

if __name__ == "__main__":
    lst = json.load(open(f'../py/Downloads/english_questions.json'))
    lst = lst[:]
    out = []
    for ln in tqdm(lst):

## gist:8d808711b895ebee5a9f10f170c3ff2e
import transformers
import torch
import torch.nn as nn
import torch.nn.functional as F

from bitsandbytes.functional import quantize_blockwise, dequantize_blockwise
from typing import Tuple
from torch.cuda.amp import custom_fwd, custom_bwd

from transformers import BloomTokenizerFast
	#!/bin/bash

	# tmux 会话、窗口和面板的名称
	TMUX_SESSION="2"
	TMUX_WINDOW="mon"
	TMUX_PANE="0" # 你想监控的面板编号（通常是 0）

	# 定义你想要发送的命令
	COMMAND="just say hi and nothing else"
	import json
	import re
	import time
	import unittest
	from collections import deque

	# Characters that are plausible candidates for insertion/replacement in a broken JSON string.
	PLAUSIBLE_CHARS = '"{}[],: '

	def _apply_simple_fixes(json_str: str) -> tuple[str, list[str]]:
	python + single-file html 生成一个prompt tune Qwen2.5-1.5B-Instruct （注意不是 Qwen2-1.5B-Instruct）模型的 demo。测试用 gsm-8k的 test split 取 200 题。
	* UI 训练 tab
	网页可起停训练（支持从 checkpoint continue train）并单独 div 显示 log（不要浮窗）。不要有弹窗。
	使用 tensorboard 保存 train loss, train/test accuracy 等。
	实验名包含所有重要超参并带时间戳防覆盖。
	* UI 测试 tab
	可以选择模型 checkpoint 测试。

	训练在 4090 bfloat16 进行。 Prompt tuning 直接实现不用 PEFT 库
	import random
	import sys

	# We are going to fine-tune OpenAI Davinci on a given text file.
	# to generate "prompt, completion" pairs for training (as a CSV file).

	fname = sys.argv[1]

	def create_csv_for_gpt3(fname):
	with open(fname) as f:
	def gpt_3_5_prompt(func: str) -> List[dict]:
	return [
	{
	'role': 'system',
	'content': f'''
	You are an intelligent programmer. You must complete the python function given to you by the user. And you must follow the format they present when giving your answer!

	You can only respond with comments and actual code, no free-flowing text (unless in a comment).'''
	},
	{
	sed -i -e 's/PRAGMA foreign_keys=OFF;//g' -e 's/BEGIN TRANSACTION;//g' -e 's/COMMIT;//g' -e 's/\"/`/g' mydatabase.sql
	sed -i 's/AUTOINCREMENT/AUTO_INCREMENT/g' mydatabase.sql
	sed -i 's/DEFERRABLE INITIALLY DEFERRED//g' mydatabase.sql
	mysql -u askbotuser -p --init-command="DROP DATABASE askbot;CREATE DATABASE askbot;SET foreign_key_checks = 0;" askbot < mydatabase.sql
	import os
	import json
	from Bard import Chatbot
	from tqdm import tqdm

	if __name__ == "__main__":
	lst = json.load(open(f'../py/Downloads/english_questions.json'))
	lst = lst[:]
	out = []
	for ln in tqdm(lst):
	import transformers
	import torch
	import torch.nn as nn
	import torch.nn.functional as F

	from bitsandbytes.functional import quantize_blockwise, dequantize_blockwise
	from typing import Tuple
	from torch.cuda.amp import custom_fwd, custom_bwd

	from transformers import BloomTokenizerFast