Skip to content

Instantly share code, notes, and snippets.

View bigsnarfdude's full-sized avatar

BigsnarfDude bigsnarfdude

View GitHub Profile
@bigsnarfdude
bigsnarfdude / output_check.py
Created April 15, 2024 16:58
head2head_output_check.py
import os
import pickle
from contextlib import nullcontext
import torch
import tiktoken
from model import GPTConfig, GPT
import datasets
import numpy as np
@bigsnarfdude
bigsnarfdude / perplexity.ipynb
Created April 13, 2024 18:28
perplexity.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@bigsnarfdude
bigsnarfdude / testing_perplexity.py
Last active April 13, 2024 17:45
testing_perplexity.py
# https://huggingface.co/docs/transformers/perplexity
import datasets
import numpy as np
import torch
from torch.nn import CrossEntropyLoss
from transformers import AutoModelForCausalLM, AutoTokenizer
import evaluate
from evaluate import logging
@bigsnarfdude
bigsnarfdude / training_data_bin_builder.py
Last active April 11, 2024 22:45
training_data_bin_builder.py
import numpy as np
import glob
import os
def read_directory(directory_path):
file_paths = glob.glob(os.path.join(directory_path, '*.bin'))
file_paths = [file for file in file_paths if os.path.basename(file) != 'train.bin']
return file_paths
def batch_group_files(file_list, batch_size):
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
git lfs install
git clone https://huggingface.co/datasets/cerebras/SlimPajama-627B
pip install datasets
#!/bin/bash
@bigsnarfdude
bigsnarfdude / prepare.py
Created April 6, 2024 17:08
slimpajamas dataset into numpy memmaps
import os
import json
import time
import glob
import jsonlines
import tiktoken
import numpy as np
import zstandard as zstd
from tqdm import tqdm
from io import StringIO
@bigsnarfdude
bigsnarfdude / gist:de53d7d25c5c8ffb81dfa938f79c5df0
Last active April 4, 2024 01:32
download slimpjs and falcon
Size Training Tokens Layers Hidden Size Attention Heads Context Length
OLMo 1B 3 Trillion 16 2048 16 2048
https://huggingface.co/allenai/OLMo-1B
git lfs install
git clone https://huggingface.co/datasets/cerebras/SlimPajama-627B
git lfs install
git clone https://huggingface.co/datasets/tiiuae/falcon-refinedweb
@bigsnarfdude
bigsnarfdude / estimates.ipynb
Last active April 2, 2024 00:35
gpt2 or tinyllama estimates A100 time needed to train the model: 43.31 days
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@bigsnarfdude
bigsnarfdude / gist:f8ee49ba6165e815facf187fdf9f783e
Last active April 3, 2024 17:23
tinyLllama but just using gpt2-large experiments
Phi-2
> 24 layers
250 Billion Tokens
96 X A100
14 Days
Model Name nparams nlayers dmodel nheads dhead Batch Size Learning Rate
GPT-3 Small 125M 12 768 12 64 0.5M 6.0 × 10−4
@bigsnarfdude
bigsnarfdude / lambda_processing_h100.py
Created March 30, 2024 19:39
lambda_processing_h100.py
import os
from openai import OpenAI
import prompt
def truncate_words(input_string, max_words):
words = input_string.split()
truncated_words = words[:max_words]
return ' '.join(truncated_words)