Skip to content

Instantly share code, notes, and snippets.

View DarshanDeshpande's full-sized avatar
:octocat:

DarshanDeshpande

:octocat:
View GitHub Profile
@DarshanDeshpande
DarshanDeshpande / ppo_modeling.py
Last active March 15, 2024 02:09
Training script for PPO modeling
import argparse
import os
import torch
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import LoraConfig
from trl import (
PPOTrainer,
PPOConfig,
@DarshanDeshpande
DarshanDeshpande / reward_modeling.py
Created March 14, 2024 06:24
Training script for reward modeling for RLHF
import torch
from datasets import load_dataset
from tqdm import tqdm
from transformers import AutoModelForSequenceClassification, AutoTokenizer, BitsAndBytesConfig
from trl import RewardConfig, RewardTrainer
from peft import LoraConfig, get_peft_model
import wandb
import os
import argparse
@DarshanDeshpande
DarshanDeshpande / quantileregression.ipynb
Last active August 21, 2021 04:52
quantileregression
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import pytube
from bs4 import BeautifulSoup
import sqlite3
import html
import re
import os
from tqdm.notebook import tqdm
import requests
conn = sqlite3.connect('Database.db')