Sayantan Das ucalyptus2

## instruction-dataset-prometheus.py
# pip install "distilabel[vllm]>=1.1.1"
# pip install flash-attn --no-build-isolation
# huggingface-cli login
import time

from distilabel.llms import vLLM
from distilabel.pipeline import Pipeline
from distilabel.steps import KeepColumns, LoadHubDataset
from distilabel.steps.tasks import PrometheusEval

## mamba_peft.py
from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
model = AutoModelForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
dataset = load_dataset("Abirate/english_quotes", split="train")
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=3,

## gist:e59cc7dce7f3802af0680e9d0d1e4bae
tok_embeddings.weight torch.Size([32000, 4096])
norm.weight torch.Size([4096])
output.weight torch.Size([32000, 4096])
layers.0.attention_norm.weight torch.Size([4096])
layers.0.attention.wq.weight torch.Size([4096, 4096])
layers.0.attention.wk.weight torch.Size([1024, 4096])
layers.0.attention.wv.weight torch.Size([1024, 4096])
layers.0.attention.wo.weight torch.Size([4096, 4096])
layers.0.feed_forward.gate.weight torch.Size([8, 4096])
layers.0.ffn_norm.weight torch.Size([4096])

## finetune_llama2.py
# Based on younesbelkada/finetune_llama_v2.py
# Install the following libraries:
# pip install accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 scipy

from dataclasses import dataclass, field
from typing import Optional

import torch
from datasets import load_dataset
from transformers import (

## finetune_llama_v2.py
# coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software

## flash.py
import pytest
import torch

import triton
import triton.language as tl


@triton.jit
def _fwd_kernel(
    Q, K, V, sm_scale,

## mathpix2gpt.py
import requests
import time
import os
import sys
import openai
import tiktoken
from termcolor import colored

openai.api_key = open(os.path.expanduser('~/.openai')).read().strip()

## claude-ai-plugins.py
from langchain.llms import Anthropic
from langchain.agents import load_tools, initialize_agent
from langchain.tools import AIPluginTool
PREFIX = """\n\nHuman: Answer the following questions as best you can. You have access to the following tools:"""
SUFFIX = """Begin!

Question: {input}
\n\nAssistant:
Thought:{agent_scratchpad}"""

## langchain_to_chatgpt-retrieval-plugin.py
# STEP 1: Load

# Load documents using LangChain's DocumentLoaders
# This is from https://langchain.readthedocs.io/en/latest/modules/document_loaders/examples/csv.html

from langchain.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv')
data = loader.load()

## Slurm-sbatch-email-with-output.sh
#!/bin/bash
#SBATCH -J MyModel
#SBATCH -n 1 # Number of cores
#SBATCH -t 1-00:00 # Runtime in D-HH:MM
#SBATCH -o JOB%j.out # File to which STDOUT will be written
#SBATCH -e JOB%j.out # File to which STDERR will be written
#SBATCH --mail-type=BEGIN
#SBATCH --mail-user=my@email.com

secs_to_human(){
	# pip install "distilabel[vllm]>=1.1.1"
	# pip install flash-attn --no-build-isolation
	# huggingface-cli login
	import time

	from distilabel.llms import vLLM
	from distilabel.pipeline import Pipeline
	from distilabel.steps import KeepColumns, LoadHubDataset
	from distilabel.steps.tasks import PrometheusEval
	from datasets import load_dataset
	from trl import SFTTrainer
	from peft import LoraConfig
	from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
	tokenizer = AutoTokenizer.from_pretrained("state-spaces/mamba-130m-hf")
	model = AutoModelForCausalLM.from_pretrained("state-spaces/mamba-130m-hf")
	dataset = load_dataset("Abirate/english_quotes", split="train")
	training_args = TrainingArguments(
	output_dir="./results",
	num_train_epochs=3,
	tok_embeddings.weight torch.Size([32000, 4096])
	norm.weight torch.Size([4096])
	output.weight torch.Size([32000, 4096])
	layers.0.attention_norm.weight torch.Size([4096])
	layers.0.attention.wq.weight torch.Size([4096, 4096])
	layers.0.attention.wk.weight torch.Size([1024, 4096])
	layers.0.attention.wv.weight torch.Size([1024, 4096])
	layers.0.attention.wo.weight torch.Size([4096, 4096])
	layers.0.feed_forward.gate.weight torch.Size([8, 4096])
	layers.0.ffn_norm.weight torch.Size([4096])
	# Based on younesbelkada/finetune_llama_v2.py
	# Install the following libraries:
	# pip install accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 scipy

	from dataclasses import dataclass, field
	from typing import Optional

	import torch
	from datasets import load_dataset
	from transformers import (
	# coding=utf-8
	# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	import pytest
	import torch

	import triton
	import triton.language as tl


	@triton.jit
	def _fwd_kernel(
	Q, K, V, sm_scale,
	import requests
	import time
	import os
	import sys
	import openai
	import tiktoken
	from termcolor import colored

	openai.api_key = open(os.path.expanduser('~/.openai')).read().strip()
	from langchain.llms import Anthropic
	from langchain.agents import load_tools, initialize_agent
	from langchain.tools import AIPluginTool
	PREFIX = """\n\nHuman: Answer the following questions as best you can. You have access to the following tools:"""
	SUFFIX = """Begin!

	Question: {input}
	\n\nAssistant:
	Thought:{agent_scratchpad}"""
	# STEP 1: Load

	# Load documents using LangChain's DocumentLoaders
	# This is from https://langchain.readthedocs.io/en/latest/modules/document_loaders/examples/csv.html

	from langchain.document_loaders.csv_loader import CSVLoader
	loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv')
	data = loader.load()
	#!/bin/bash
	#SBATCH -J MyModel
	#SBATCH -n 1 # Number of cores
	#SBATCH -t 1-00:00 # Runtime in D-HH:MM
	#SBATCH -o JOB%j.out # File to which STDOUT will be written
	#SBATCH -e JOB%j.out # File to which STDERR will be written
	#SBATCH --mail-type=BEGIN
	#SBATCH --mail-user=my@email.com

	secs_to_human(){