Turning out data tricks since 2006! thistleknot

## parse_json.py
import json

def load_json_from_disk(file_path):
    """
    Load JSON data from disk.

    Parameters:
    - file_path (str): The path to the JSON file.

    Returns:

## loreft.py
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from pyreft import ReftConfig, ReftTrainerForCausalLM, get_reft_model, ReftSupervisedDataset, ReftDataCollator, LoreftIntervention

import torch

import pyreft
from datasets import load_dataset


## script.py
#for data txt files see: https://github.com/TheCynosure/smmry_impl
#example use
"""
Search_web("history of Taco Tuesday")
Tell me about this.
"""
#get google api keys'
#https://console.cloud.google.com/apis/dashboard
#https://programmablesearchengine.google.com/controlpanel/all
#could be retooled quite easily to use duckduckgo_search rather than google and you don't have to mess with getting api key's

## yahoo_finance.py
def get_v1_url(symbol, period_type, crumb):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
    }

    period1 = 493590046
    period2 = 1913180947

## minimum nanogpt mamba
import torch
import torch.nn as nn
from torch.nn import  functional as F
from torch.nn.parameter import Parameter
from tqdm import tqdm
from mamba_ssm import Mamba
#hyperparams
epochs = 100
lr = 1e-3
batch_size = 64

## train_mamba.py
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
import wandb
from datasets import load_dataset
import torch
import os
import argparse
import numpy as np
import pandas as pd
from transformers import EvalPrediction
from torch.utils.data import DataLoader

## efficient_batching_v2.py
#This method deducts from the list sent in (splitting the records between sample and remainder).
#Always 100% full of data until no more samples can be extracted where an empty sample along with the remainder are returned [where the remainder is to be folded into a new iteration]

# Function to find the combination of values that adds up to the target sum
def find_combination_to_sum(counts, target):
    #print("Target inside function (find_combination_to_sum):", target)
    values = []
    for val, count in counts.items():
        #print(f"Value (val): {val}, Type: {type(val)}")
        #print(f"Count: {count}, Type: {type(count)}")

## GPT-Neo_Classify.py
from transformers import GPT2Tokenizer, GPTNeoForCausalLM
import torch
import torch.nn.functional as F

# Load the GPT-Neo 1.3B model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")

# Your question and prompt
question = "Is a bird a mammal?"

## gpt_classify.py
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F

# Load the GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Your question and prompt
question = "Is a bird a mammal?"

## optimal_cut.py
#!pip install --upgrade numpy
!pip install numpy==1.24


from datasets import load_dataset
from pyod.models.knn import KNN
from pyod.models.knn import KNN   # Example: You can use K-Nearest Neighbors as an ECOD model
from scipy import stats
from scipy.interpolate import UnivariateSpline
from scipy.stats import gaussian_kde
	import json

	def load_json_from_disk(file_path):
	"""
	Load JSON data from disk.

	Parameters:
	- file_path (str): The path to the JSON file.

	Returns:
	import transformers
	from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
	from pyreft import ReftConfig, ReftTrainerForCausalLM, get_reft_model, ReftSupervisedDataset, ReftDataCollator, LoreftIntervention

	import torch

	import pyreft
	from datasets import load_dataset
	#for data txt files see: https://github.com/TheCynosure/smmry_impl
	#example use
	"""
	Search_web("history of Taco Tuesday")
	Tell me about this.
	"""
	#get google api keys'
	#https://console.cloud.google.com/apis/dashboard
	#https://programmablesearchengine.google.com/controlpanel/all
	#could be retooled quite easily to use duckduckgo_search rather than google and you don't have to mess with getting api key's
	def get_v1_url(symbol, period_type, crumb):
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.5',
	}

	period1 = 493590046
	period2 = 1913180947
	import torch
	import torch.nn as nn
	from torch.nn import functional as F
	from torch.nn.parameter import Parameter
	from tqdm import tqdm
	from mamba_ssm import Mamba
	#hyperparams
	epochs = 100
	lr = 1e-3
	batch_size = 64
	from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
	import wandb
	from datasets import load_dataset
	import torch
	import os
	import argparse
	import numpy as np
	import pandas as pd
	from transformers import EvalPrediction
	from torch.utils.data import DataLoader
	#This method deducts from the list sent in (splitting the records between sample and remainder).
	#Always 100% full of data until no more samples can be extracted where an empty sample along with the remainder are returned [where the remainder is to be folded into a new iteration]

	# Function to find the combination of values that adds up to the target sum
	def find_combination_to_sum(counts, target):
	#print("Target inside function (find_combination_to_sum):", target)
	values = []
	for val, count in counts.items():
	#print(f"Value (val): {val}, Type: {type(val)}")
	#print(f"Count: {count}, Type: {type(count)}")
	from transformers import GPT2Tokenizer, GPTNeoForCausalLM
	import torch
	import torch.nn.functional as F

	# Load the GPT-Neo 1.3B model and tokenizer
	tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B")
	model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B")

	# Your question and prompt
	question = "Is a bird a mammal?"
	#!pip install --upgrade numpy
	!pip install numpy==1.24


	from datasets import load_dataset
	from pyod.models.knn import KNN
	from pyod.models.knn import KNN # Example: You can use K-Nearest Neighbors as an ECOD model
	from scipy import stats
	from scipy.interpolate import UnivariateSpline
	from scipy.stats import gaussian_kde