This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#This method deducts from the list sent in (splitting the records between sample and remainder). | |
#Always 100% full of data until no more samples can be extracted where an empty sample along with the remainder are returned [where the remainder is to be folded into a new iteration] | |
# Function to find the combination of values that adds up to the target sum | |
def find_combination_to_sum(counts, target): | |
#print("Target inside function (find_combination_to_sum):", target) | |
values = [] | |
for val, count in counts.items(): | |
#print(f"Value (val): {val}, Type: {type(val)}") | |
#print(f"Count: {count}, Type: {type(count)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import GPT2Tokenizer, GPTNeoForCausalLM | |
import torch | |
import torch.nn.functional as F | |
# Load the GPT-Neo 1.3B model and tokenizer | |
tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B") | |
model = GPTNeoForCausalLM.from_pretrained("EleutherAI/gpt-neo-1.3B") | |
# Your question and prompt | |
question = "Is a bird a mammal?" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import GPT2Tokenizer, GPT2LMHeadModel | |
import torch | |
import torch.nn.functional as F | |
# Load the GPT-2 model and tokenizer | |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2") | |
model = GPT2LMHeadModel.from_pretrained("gpt2") | |
# Your question and prompt | |
question = "Is a bird a mammal?" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!pip install --upgrade numpy | |
!pip install numpy==1.24 | |
from datasets import load_dataset | |
from pyod.models.knn import KNN | |
from pyod.models.knn import KNN # Example: You can use K-Nearest Neighbors as an ECOD model | |
from scipy import stats | |
from scipy.interpolate import UnivariateSpline | |
from scipy.stats import gaussian_kde |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_batches(records, block_size, num_batches, eos_token_id): | |
random.shuffle(records) | |
# Adding eos_token_id to each record and then checking if it fits in the block | |
available_records = [[i, record + [eos_token_id]] for i, record in enumerate(records) if len(record) + 1 <= block_size] | |
def fill_sequence(sequence, available_records, space_avail): | |
if not available_records or space_avail <= 0: | |
return sequence, available_records, space_avail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import argparse | |
from transformers import MistralConfig, AutoModelForCausalLM | |
import torch | |
import sys | |
import os | |
def calculate_model_parameters(config): | |
# Load the model configuration from the JSON file | |
# Extract the necessary values from the configuration |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[3]: | |
#!pip install --upgrade numpy | |
get_ipython().system('pip install numpy==1.24') | |
# In[4]: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://gist.githubusercontent.com/thistleknot/raw/mamba_trainer.py | |
#SimplerMambaSSM | |
#https://colab.research.google.com/drive/1g9qpeVcFa0ca0cnhmqusO4RZtQdh9umY#scrollTo=2lECw6S4N7cn | |
#!pip install mamba-ssm causal-conv1d | |
#resources | |
#!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt | |
#https://github.com/havenhq/mamba-chat/blob/main/trainer/mamba_trainer.py | |
#https://github.com/state-spaces/mamba/blob/main/mamba_ssm/models/mixer_seq_simple.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#https://gist.githubusercontent.com/thistleknot/raw/mamba_trainer.py | |
#!pip install mamba-ssm causal-conv1d | |
#resources | |
#!wget https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt | |
#https://github.com/havenhq/mamba-chat/blob/main/trainer/mamba_trainer.py | |
#https://github.com/state-spaces/mamba/blob/main/mamba_ssm/models/mixer_seq_simple.py | |
#https://github.com/state-spaces/mamba/blob/main/mamba_ssm/modules/mamba_simple.py | |
#https://huggingface.co/clibrain/mamba-2.8b-instruct-openhermes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from textblob import TextBlob | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
import numpy as np | |
import scipy.stats as stats | |
original_text = [ | |
"Don't incur technical debt, fully define what is proposed.", | |
"Prefer O'Reilly style writing using examples of time-tested failproof boilerplate solutions with docstring comments.", | |
"Assume user's expertise: Masters in Data Science, Classical Philosophy, and proficiency in AI, Python, SQL.", | |
"Always deliver production ready code.", |