Skip to content

Instantly share code, notes, and snippets.

View shahules786's full-sized avatar
👋

Shahul ES shahules786

👋
View GitHub Profile
nli_prompt = RagasPrompt(
instruction="""Natural language inference. Use only "Yes" (1) or "No" (0) as a binary verdict.""",
examples=[
{
"Context":"""John is a student at XYZ University. He is pursuing a degree in Computer Science. He is enrolled in several courses this semester, including Data Structures, Algorithms, and Database Management. John is a diligent student and spends a significant amount of time studying and completing assignments. He often stays late in the library to work on his projects.
statement_1: John is majoring in Biology.
statement_2: John is taking a course on Artificial Intelligence.
statement_3: John is a dedicated student.
statement_4: John has a part-time job.""",
"Answer":"""[
from ragas import evaluate
from datasets import Dataset
import os
os.environ["OPENAI_API_KEY"] = "your-openai-key"
# prepare your huggingface dataset in the format
# Dataset({
# features: ['question','contexts','answer'],
{"input": [{"role": "system", "content": "Complete the phrase as concisely as possible."},
{"role": "user", "content": "ABC AI is a company specializing in ML "}], "ideal": "observability"}
from huggingface_hub import HfApi
import os
import argparse
api = HfApi()
model_files = ["tokenizer.json","special_tokens_map.json","tokenizer_config.json","config.json","pytorch_model.bin"]
if __name__ == "__main__":
@dataclass
class RMDataCollator:
tokenizer: PreTrainedTokenizer
max_length: int = 512
def format_example(self, example, eos, prompt=False):
sp_token = SPECIAL_TOKENS["prompter"] if prompt else SPECIAL_TOKENS["assistant"]
return "{}{}{}".format(sp_token, example, eos)
def process_example(self, example):
class WebGPT:
name = "openai/webgpt_comparisons"
def __init__(self, split: str = "train"):
super().__init__()
self.split = split
dataset = load_dataset(self.name, split=self.split)
self.dataset_dict = defaultdict(dict)
for item in dataset:
class RMLoss(nn.Module):
""" """
def __init__(
self,
reduction=None,
beta=0.001,
):
super().__init__()
self.reduction = reduction
@dataclass
class GPTNeoxRMOuptput(ModelOutput):
"""
Reward Model Output
"""
logits: torch.FloatTensor = None
class GPTNeoXRM(GPTNeoXPreTrainedModel):
def reintialize(model,num_layers):
"""reinitialize models weights untill num_layers starting from bottom layer"""
return model
model = DefinedModel()
model = reinitialize(model,2)
def llrd(model,peak_lr,multiplicative factor):
"""sets learning rate for each layer and returns the parameters"""
parameters = get_model_parameters()
return parameters