Turning out data tricks since 2006! thistleknot

## gpt_classify.py
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch
import torch.nn.functional as F

# Load the GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

# Your question and prompt
question = "Is a bird a mammal?"

## optimal_cut.py
#!pip install --upgrade numpy
!pip install numpy==1.24


from datasets import load_dataset
from pyod.models.knn import KNN
from pyod.models.knn import KNN   # Example: You can use K-Nearest Neighbors as an ECOD model
from scipy import stats
from scipy.interpolate import UnivariateSpline
from scipy.stats import gaussian_kde

## ECOD_KDE_95_pct_quotes.py
#!/usr/bin/env python
# coding: utf-8

# In[3]:

#!pip install --upgrade numpy
get_ipython().system('pip install numpy==1.24')

# In[4]:

## efficient_batching.py
def create_batches(records, block_size, num_batches, eos_token_id):
    random.shuffle(records)

    # Adding eos_token_id to each record and then checking if it fits in the block
    available_records = [[i, record + [eos_token_id]] for i, record in enumerate(records) if len(record) + 1 <= block_size]

    def fill_sequence(sequence, available_records, space_avail):
        if not available_records or space_avail <= 0:
            return sequence, available_records, space_avail

## mamba_attn_fff.py
#https://gist.githubusercontent.com/thistleknot/raw/mamba_trainer.py
#SimplerMambaSSM
#https://colab.research.google.com/drive/1g9qpeVcFa0ca0cnhmqusO4RZtQdh9umY#scrollTo=2lECw6S4N7cn

#!pip install mamba-ssm causal-conv1d

#resources
#!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
#https://github.com/havenhq/mamba-chat/blob/main/trainer/mamba_trainer.py
#https://github.com/state-spaces/mamba/blob/main/mamba_ssm/models/mixer_seq_simple.py

## mamba_trainer.py
#https://gist.githubusercontent.com/thistleknot/raw/mamba_trainer.py

#!pip install mamba-ssm causal-conv1d

#resources
#!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
#https://github.com/havenhq/mamba-chat/blob/main/trainer/mamba_trainer.py
#https://github.com/state-spaces/mamba/blob/main/mamba_ssm/models/mixer_seq_simple.py
#https://github.com/state-spaces/mamba/blob/main/mamba_ssm/modules/mamba_simple.py
#https://huggingface.co/clibrain/mamba-2.8b-instruct-openhermes

## tfidf_summ.py
from textblob import TextBlob
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import scipy.stats as stats

original_text = [
"Don't incur technical debt, fully define what is proposed.",
"Prefer O'Reilly style writing using examples of time-tested failproof boilerplate solutions with docstring comments.",
"Assume user's expertise: Masters in Data Science, Classical Philosophy, and proficiency in AI, Python, SQL.",
"Always deliver production ready code.",

## Mamba-gpt-w-sub-word.py
# -*- coding: utf-8 -*-
"""SimplerMambaSSM.ipynb
Automatically generated by Colaboratory.
#pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
Original file is located at
    https://colab.research.google.com/drive/1g9qpeVcFa0ca0cnhmqusO4RZtQdh9umY
"""
#!pip install mamba-ssm causal-conv1d
#!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
#!mkdir differentattention

## mamba-gpt.py
# -*- coding: utf-8 -*-
"""SimplerMambaSSM.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1g9qpeVcFa0ca0cnhmqusO4RZtQdh9umY
"""

#!pip install mamba-ssm causal-conv1d

## llm inference.py
#export LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/lib/

from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
from datasets import load_dataset
import json
import torch
from tqdm import tqdm

if torch.cuda.is_available():
	from transformers import GPT2Tokenizer, GPT2LMHeadModel
	import torch
	import torch.nn.functional as F

	# Load the GPT-2 model and tokenizer
	tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
	model = GPT2LMHeadModel.from_pretrained("gpt2")

	# Your question and prompt
	question = "Is a bird a mammal?"
	#!pip install --upgrade numpy
	!pip install numpy==1.24


	from datasets import load_dataset
	from pyod.models.knn import KNN
	from pyod.models.knn import KNN # Example: You can use K-Nearest Neighbors as an ECOD model
	from scipy import stats
	from scipy.interpolate import UnivariateSpline
	from scipy.stats import gaussian_kde
	#!/usr/bin/env python
	# coding: utf-8

	# In[3]:

	#!pip install --upgrade numpy
	get_ipython().system('pip install numpy==1.24')

	# In[4]:
	def create_batches(records, block_size, num_batches, eos_token_id):
	random.shuffle(records)

	# Adding eos_token_id to each record and then checking if it fits in the block
	available_records = [[i, record + [eos_token_id]] for i, record in enumerate(records) if len(record) + 1 <= block_size]

	def fill_sequence(sequence, available_records, space_avail):
	if not available_records or space_avail <= 0:
	return sequence, available_records, space_avail
	#https://gist.githubusercontent.com/thistleknot/raw/mamba_trainer.py
	#SimplerMambaSSM
	#https://colab.research.google.com/drive/1g9qpeVcFa0ca0cnhmqusO4RZtQdh9umY#scrollTo=2lECw6S4N7cn

	#!pip install mamba-ssm causal-conv1d

	#resources
	#!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
	#https://github.com/havenhq/mamba-chat/blob/main/trainer/mamba_trainer.py
	#https://github.com/state-spaces/mamba/blob/main/mamba_ssm/models/mixer_seq_simple.py
	from textblob import TextBlob
	from sklearn.feature_extraction.text import TfidfVectorizer
	import numpy as np
	import scipy.stats as stats

	original_text = [
	"Don't incur technical debt, fully define what is proposed.",
	"Prefer O'Reilly style writing using examples of time-tested failproof boilerplate solutions with docstring comments.",
	"Assume user's expertise: Masters in Data Science, Classical Philosophy, and proficiency in AI, Python, SQL.",
	"Always deliver production ready code.",
	# -- coding: utf-8 --
	"""SimplerMambaSSM.ipynb
	Automatically generated by Colaboratory.
	#pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu121
	Original file is located at
	https://colab.research.google.com/drive/1g9qpeVcFa0ca0cnhmqusO4RZtQdh9umY
	"""
	#!pip install mamba-ssm causal-conv1d
	#!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt
	#!mkdir differentattention
	#export LD_LIBRARY_PATH=/usr/local/cuda/targets/x86_64-linux/lib/

	from transformers import AutoTokenizer, AutoModelForCausalLM
	from transformers import BitsAndBytesConfig
	from datasets import load_dataset
	import json
	import torch
	from tqdm import tqdm

	if torch.cuda.is_available():