SushantDaga

## lora_example.py
import torch
import torch.nn as nn
import torch.nn.utils.parametrize as parametrize
from torch.utils._pytree import tree_map

class LoraTensor(object):
    def __init__(self, weights, A, B):
        self.weights = weights
        self.A = A
        self.B = B

## rl-for-llms.md

      
              1 file
            
          
              23 forks
            
          
              11 comments
            
          
              534 stars
            
          
                yoavg
                / rl-for-llms.md
            
            
              Last active
              June 3, 2024 02:19
            
          
    Reinforcement Learning for Language Models

Yoav Goldberg, April 2023.
Why RL?

With the release of the ChatGPT model and followup large language models (LLMs), there was a lot of discussion of the importance of "RLHF training", that is, "reinforcement learning from human feedback".
I was puzzled for a while as to why RL (Reinforcement Learning) is better than learning from demonstrations (a.k.a supervised learning) for training language models. Shouldn't learning from demonstrations (or, in language model terminology "instruction fine tuning", learning to immitate human written answers) be sufficient? I came up with a theoretical argument that was somewhat convincing. But I came to realize there is an additional argumment which not only supports the case of RL training, but also requires it, in particular for models like ChatGPT. This additional argument is spelled out in (the first half of) a talk by John Schulman from OpenAI. This post pretty much

  
## pointer_network.py
class PointerNetwork(nn.Module):
  """
  From "Pointer Networks" by Vinyals et al. (2017)

  Adapted from pointer-networks-pytorch by ast0414:
    https://github.com/ast0414/pointer-networks-pytorch

  Args:
    n_hidden: The number of features to expect in the inputs.
  """

## pos_embed.py
import math
import torch
import torch.nn as nn
from torch.nn import functional as F


class RelativePositionBias(nn.Module):
    def __init__(self, bidirectional=True, num_buckets=32, max_distance=128, n_heads=2):
        super(RelativePositionBias, self).__init__()
        self.bidirectional = bidirectional

## pad_packed_demo.py
import torch
from torch import LongTensor
from torch.nn import Embedding, LSTM
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

## We want to run LSTM on a batch of 3 character sequences ['long_str', 'tiny', 'medium']
#
#     Step 1: Construct Vocabulary
#     Step 2: Load indexed data (list of instances, where each instance is list of character indices)

## Mixture_of_softmaxes.py
# PyTorch code For implementing the mixture of softmaxes layer from
# "Breaking the Softmax Bottleneck: A High-Rank RNN Language Model"
# https://arxiv.org/abs/1711.03953
context = self.fc(out)

# Non-log version
priors = F.softmax(context[:,-self.n_components:])
mixtures = torch.stack([priors[:,i].unsqueeze(1) * F.softmax(context[:, i * self.nClasses : (i + 1) * self.nClasses]) for i in range(self.n_components)],1)
out = torch.log(mixtures.sum(1))

## min-char-rnn.py
"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np

# data I/O
data = open('input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
	import torch
	import torch.nn as nn
	import torch.nn.utils.parametrize as parametrize
	from torch.utils._pytree import tree_map

	class LoraTensor(object):
	def __init__(self, weights, A, B):
	self.weights = weights
	self.A = A
	self.B = B
	class PointerNetwork(nn.Module):
	"""
	From "Pointer Networks" by Vinyals et al. (2017)

	Adapted from pointer-networks-pytorch by ast0414:
	https://github.com/ast0414/pointer-networks-pytorch

	Args:
	n_hidden: The number of features to expect in the inputs.
	"""
	import math
	import torch
	import torch.nn as nn
	from torch.nn import functional as F


	class RelativePositionBias(nn.Module):
	def __init__(self, bidirectional=True, num_buckets=32, max_distance=128, n_heads=2):
	super(RelativePositionBias, self).__init__()
	self.bidirectional = bidirectional
	import torch
	from torch import LongTensor
	from torch.nn import Embedding, LSTM
	from torch.autograd import Variable
	from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

	## We want to run LSTM on a batch of 3 character sequences ['long_str', 'tiny', 'medium']
	#
	# Step 1: Construct Vocabulary
	# Step 2: Load indexed data (list of instances, where each instance is list of character indices)
	# PyTorch code For implementing the mixture of softmaxes layer from
	# "Breaking the Softmax Bottleneck: A High-Rank RNN Language Model"
	# https://arxiv.org/abs/1711.03953
	context = self.fc(out)

	# Non-log version
	priors = F.softmax(context[:,-self.n_components:])
	mixtures = torch.stack([priors[:,i].unsqueeze(1) * F.softmax(context[:, i * self.nClasses : (i + 1) * self.nClasses]) for i in range(self.n_components)],1)
	out = torch.log(mixtures.sum(1))
	"""
	Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
	BSD License
	"""
	import numpy as np

	# data I/O
	data = open('input.txt', 'r').read() # should be simple plain text file
	chars = list(set(data))
	data_size, vocab_size = len(data), len(chars)