SushantDaga

## min-char-rnn.py
"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np

# data I/O
data = open('input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

## Mixture_of_softmaxes.py
# PyTorch code For implementing the mixture of softmaxes layer from
# "Breaking the Softmax Bottleneck: A High-Rank RNN Language Model"
# https://arxiv.org/abs/1711.03953
context = self.fc(out)

# Non-log version
priors = F.softmax(context[:,-self.n_components:])
mixtures = torch.stack([priors[:,i].unsqueeze(1) * F.softmax(context[:, i * self.nClasses : (i + 1) * self.nClasses]) for i in range(self.n_components)],1)
out = torch.log(mixtures.sum(1))

## pad_packed_demo.py
import torch
from torch import LongTensor
from torch.nn import Embedding, LSTM
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

## We want to run LSTM on a batch of 3 character sequences ['long_str', 'tiny', 'medium']
#
#     Step 1: Construct Vocabulary
#     Step 2: Load indexed data (list of instances, where each instance is list of character indices)

## pos_embed.py
import math
import torch
import torch.nn as nn
from torch.nn import functional as F


class RelativePositionBias(nn.Module):
    def __init__(self, bidirectional=True, num_buckets=32, max_distance=128, n_heads=2):
        super(RelativePositionBias, self).__init__()
        self.bidirectional = bidirectional

## pointer_network.py
class PointerNetwork(nn.Module):
  """
  From "Pointer Networks" by Vinyals et al. (2017)

  Adapted from pointer-networks-pytorch by ast0414:
    https://github.com/ast0414/pointer-networks-pytorch

  Args:
    n_hidden: The number of features to expect in the inputs.
  """

## rl-for-llms.md

      
              1 file
            
          
              26 forks
            
          
              11 comments
            
          
              543 stars
            
          
                yoavg
                / rl-for-llms.md
            
            
              Last active
              July 23, 2024 08:52
            
          
    Reinforcement Learning for Language Models

Yoav Goldberg, April 2023.
Why RL?

With the release of the ChatGPT model and followup large language models (LLMs), there was a lot of discussion of the importance of "RLHF training", that is, "reinforcement learning from human feedback".
I was puzzled for a while as to why RL (Reinforcement Learning) is better than learning from demonstrations (a.k.a supervised learning) for training language models. Shouldn't learning from demonstrations (or, in language model terminology "instruction fine tuning", learning to immitate human written answers) be sufficient? I came up with a theoretical argument that was somewhat convincing. But I came to realize there is an additional argumment which not only supports the case of RL training, but also requires it, in particular for models like ChatGPT. This additional argument is spelled out in (the first half of) a talk by John Schulman from OpenAI. This post pretty much

  
## lora_example.py
import torch
import torch.nn as nn
import torch.nn.utils.parametrize as parametrize
from torch.utils._pytree import tree_map

class LoraTensor(object):
    def __init__(self, weights, A, B):
        self.weights = weights
        self.A = A
        self.B = B
	"""
	Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
	BSD License
	"""
	import numpy as np

	# data I/O
	data = open('input.txt', 'r').read() # should be simple plain text file
	chars = list(set(data))
	data_size, vocab_size = len(data), len(chars)
	# PyTorch code For implementing the mixture of softmaxes layer from
	# "Breaking the Softmax Bottleneck: A High-Rank RNN Language Model"
	# https://arxiv.org/abs/1711.03953
	context = self.fc(out)

	# Non-log version
	priors = F.softmax(context[:,-self.n_components:])
	mixtures = torch.stack([priors[:,i].unsqueeze(1) * F.softmax(context[:, i * self.nClasses : (i + 1) * self.nClasses]) for i in range(self.n_components)],1)
	out = torch.log(mixtures.sum(1))
	import torch
	from torch import LongTensor
	from torch.nn import Embedding, LSTM
	from torch.autograd import Variable
	from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

	## We want to run LSTM on a batch of 3 character sequences ['long_str', 'tiny', 'medium']
	#
	# Step 1: Construct Vocabulary
	# Step 2: Load indexed data (list of instances, where each instance is list of character indices)
	import math
	import torch
	import torch.nn as nn
	from torch.nn import functional as F


	class RelativePositionBias(nn.Module):
	def __init__(self, bidirectional=True, num_buckets=32, max_distance=128, n_heads=2):
	super(RelativePositionBias, self).__init__()
	self.bidirectional = bidirectional
	class PointerNetwork(nn.Module):
	"""
	From "Pointer Networks" by Vinyals et al. (2017)

	Adapted from pointer-networks-pytorch by ast0414:
	https://github.com/ast0414/pointer-networks-pytorch

	Args:
	n_hidden: The number of features to expect in the inputs.
	"""
	import torch
	import torch.nn as nn
	import torch.nn.utils.parametrize as parametrize
	from torch.utils._pytree import tree_map

	class LoraTensor(object):
	def __init__(self, weights, A, B):
	self.weights = weights
	self.A = A
	self.B = B