Ruotian(RT) Luo ruotianluo

## rl-for-llms.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                ruotianluo
                / rl-for-llms.md
            
            
              Created
              July 18, 2024 22:26
                — forked from yoavg/rl-for-llms.md
            
          
    Reinforcement Learning for Language Models

Yoav Goldberg, April 2023.
Why RL?

With the release of the ChatGPT model and followup large language models (LLMs), there was a lot of discussion of the importance of "RLHF training", that is, "reinforcement learning from human feedback".
I was puzzled for a while as to why RL (Reinforcement Learning) is better than learning from demonstrations (a.k.a supervised learning) for training language models. Shouldn't learning from demonstrations (or, in language model terminology "instruction fine tuning", learning to immitate human written answers) be sufficient? I came up with a theoretical argument that was somewhat convincing. But I came to realize there is an additional argumment which not only supports the case of RL training, but also requires it, in particular for models like ChatGPT. This additional argument is spelled out in (the first half of) a talk by John Schulman from OpenAI. This post pretty much

  
## gist:6e580537f902c465f308cf76fcbcda9e
class InfiniteConcatDistributedSampler(DistributedSampler):

    def __init__(self, *args, **kwargs):
        """
        Args:
            global_batch_size: since infinite indices will wrap,
                so it is possible that same images in one batch.
                We apply drop_last here in the sampler.
            determistic: we always start the generator with seed 0,
                and then to restart from certain iteration, we just

## find_ipad_slot.py
# Find spot.
from bs4 import BeautifulSoup
import requests
import json

# change accordingly.
postal_code = '60615'

def find_spot():
    headers = {

## gist:c7e1a6b424fe4adbcedf6dcab4b80ded
import torch
import torch.nn as nn

class involution(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.K = K = 3
        self.C = C = 256
        self.r = r =  64
        self.G = G = 64

## gist:54c25460b2ca43a274f50e1a7daa409a
import torch
import torch.nn as nn
class X(nn.Module):
    def __init__(self):
        super().__init__()
        self.a = nn.Linear(3,4)
    def forward(self, x):
        x = next(self.parameters())

import torchvision

## test_dist.py
import os
import tempfile
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.optim as optim
import torch.multiprocessing as mp

from torch.nn.parallel import DistributedDataParallel as DDP

## train_cityscapes2.py
import torch
import torch.multiprocessing as mp
import torch.distributed as dist
from torchvision import transforms
import random
from functools import partial
from easydict import EasyDict as edict
from albumentations import (
    Compose, HorizontalFlip, ShiftScaleRotate, PadIfNeeded, RandomCrop,
    RGBShift, RandomBrightness, RandomContrast

## train_cityscapes2.py
import torch
import torch.multiprocessing as mp
import torch.distributed as dist
from torchvision import transforms
import random
from functools import partial
from easydict import EasyDict as edict
from albumentations import (
    Compose, HorizontalFlip, ShiftScaleRotate, PadIfNeeded, RandomCrop,
    RGBShift, RandomBrightness, RandomContrast

## train_cityscapes2.py
import torch
import torch.multiprocessing as mp
import torch.distributed as dist
from torchvision import transforms
import random
from functools import partial
from easydict import EasyDict as edict
from albumentations import (
    Compose, HorizontalFlip, ShiftScaleRotate, PadIfNeeded, RandomCrop,
    RGBShift, RandomBrightness, RandomContrast

## cached_transformer.py
# This file contains Transformer network
# Most of the code is copied from http://nlp.seas.harvard.edu/2018/04/03/attention.html

# The cfg name correspondance:
# N=num_layers
# d_model=input_encoding_size
# d_ff=rnn_size
# h is always 8

from __future__ import absolute_import
	class InfiniteConcatDistributedSampler(DistributedSampler):

	def __init__(self, args, *kwargs):
	"""
	Args:
	global_batch_size: since infinite indices will wrap,
	so it is possible that same images in one batch.
	We apply drop_last here in the sampler.
	determistic: we always start the generator with seed 0,
	and then to restart from certain iteration, we just
	# Find spot.
	from bs4 import BeautifulSoup
	import requests
	import json

	# change accordingly.
	postal_code = '60615'

	def find_spot():
	headers = {
	import torch
	import torch.nn as nn

	class involution(torch.nn.Module):
	def __init__(self):
	super().__init__()
	self.K = K = 3
	self.C = C = 256
	self.r = r = 64
	self.G = G = 64
	import torch
	import torch.nn as nn
	class X(nn.Module):
	def __init__(self):
	super().__init__()
	self.a = nn.Linear(3,4)
	def forward(self, x):
	x = next(self.parameters())

	import torchvision
	import os
	import tempfile
	import torch
	import torch.distributed as dist
	import torch.nn as nn
	import torch.optim as optim
	import torch.multiprocessing as mp

	from torch.nn.parallel import DistributedDataParallel as DDP
	# This file contains Transformer network
	# Most of the code is copied from http://nlp.seas.harvard.edu/2018/04/03/attention.html

	# The cfg name correspondance:
	# N=num_layers
	# d_model=input_encoding_size
	# d_ff=rnn_size
	# h is always 8

	from __future__ import absolute_import