Shahul ES shahules786

## preprocessing.py
 def tokenizer(corpus, path, mode="train"):

        """
        corpus : the corpus to tokenize and pad
        path : tokenizer path
        mode : train/test

        """

        if not path.endswith(".pickle"):

## model.py
class TweetModel(nn.Module):
    def __init__(self, embedding_matrix, lstm_hidden_size=200, gru_hidden_size=128):

        super(TweetModel, self).__init__()
        self.embedding = nn.Embedding(*embedding_matrix.shape)
        self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
        self.embedding.weight.requires_grad = True
        self.embedding_dropout = nn.Dropout2d(0.1)

        self.gru = nn.GRU(

## corpus.py
 def preprocess_text(text: list):
        """Function to preprocess and create corpus"""
        new_corpus = []
        for text in text:
            words = [w for w in text.split()]

            new_corpus.append(words)
        return new_corpus

## matrix.py

    def prepare_matrix(word_index, name, dim):

        embedding_dict = GloVe(name, dim=dim)
        num_words = len(word_index)
        embedding_matrix = np.zeros((num_words + 1, dim))

        for word, i in word_index.items():
            if i > num_words:
                continue

## train_and_reinit.py
from torch import nn as nn
from transformers import AutoModel, AutoTokenizer


base_model = AutoModel.from_pretrained('bert-base')
Model = MyModel(base_model)


 def train_and_save():
     """

## llrd.py
def llrd(model,peak_lr,multiplicative factor):

        """sets learning rate for each layer and returns the parameters"""

        parameters = get_model_parameters()


        return parameters


## reinitialize.py
def reintialize(model,num_layers):

        """reinitialize models weights untill num_layers starting from bottom layer"""


        return model


model = DefinedModel()
model = reinitialize(model,2)

## gptneox_reward_model.py
@dataclass
class GPTNeoxRMOuptput(ModelOutput):
    """
    Reward Model Output
    """

    logits: torch.FloatTensor = None


class GPTNeoXRM(GPTNeoXPreTrainedModel):

## reward_model_loss.py
class RMLoss(nn.Module):
    """ """

    def __init__(
        self,
        reduction=None,
        beta=0.001,
    ):
        super().__init__()
        self.reduction = reduction

## webgpt_reward_model.py

class WebGPT:
    name = "openai/webgpt_comparisons"

    def __init__(self, split: str = "train"):
        super().__init__()
        self.split = split
        dataset = load_dataset(self.name, split=self.split)
        self.dataset_dict = defaultdict(dict)
        for item in dataset:
	def tokenizer(corpus, path, mode="train"):

	"""
	corpus : the corpus to tokenize and pad
	path : tokenizer path
	mode : train/test

	"""

	if not path.endswith(".pickle"):
	class TweetModel(nn.Module):
	def __init__(self, embedding_matrix, lstm_hidden_size=200, gru_hidden_size=128):

	super(TweetModel, self).__init__()
	self.embedding = nn.Embedding(*embedding_matrix.shape)
	self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
	self.embedding.weight.requires_grad = True
	self.embedding_dropout = nn.Dropout2d(0.1)

	self.gru = nn.GRU(
	def preprocess_text(text: list):
	"""Function to preprocess and create corpus"""
	new_corpus = []
	for text in text:
	words = [w for w in text.split()]

	new_corpus.append(words)
	return new_corpus

	def prepare_matrix(word_index, name, dim):

	embedding_dict = GloVe(name, dim=dim)
	num_words = len(word_index)
	embedding_matrix = np.zeros((num_words + 1, dim))

	for word, i in word_index.items():
	if i > num_words:
	continue
	from torch import nn as nn
	from transformers import AutoModel, AutoTokenizer


	base_model = AutoModel.from_pretrained('bert-base')
	Model = MyModel(base_model)


	def train_and_save():
	"""
	def llrd(model,peak_lr,multiplicative factor):

	"""sets learning rate for each layer and returns the parameters"""

	parameters = get_model_parameters()


	return parameters
	def reintialize(model,num_layers):

	"""reinitialize models weights untill num_layers starting from bottom layer"""


	return model


	model = DefinedModel()
	model = reinitialize(model,2)
	@dataclass
	class GPTNeoxRMOuptput(ModelOutput):
	"""
	Reward Model Output
	"""

	logits: torch.FloatTensor = None


	class GPTNeoXRM(GPTNeoXPreTrainedModel):
	class RMLoss(nn.Module):
	""" """

	def __init__(
	self,
	reduction=None,
	beta=0.001,
	):
	super().__init__()
	self.reduction = reduction

	class WebGPT:
	name = "openai/webgpt_comparisons"

	def __init__(self, split: str = "train"):
	super().__init__()
	self.split = split
	dataset = load_dataset(self.name, split=self.split)
	self.dataset_dict = defaultdict(dict)
	for item in dataset: