BigsnarfDude bigsnarfdude

## output_check.py

import os
import pickle
from contextlib import nullcontext
import torch
import tiktoken
from model import GPTConfig, GPT

import datasets
import numpy as np

## perplexity.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bigsnarfdude
                / perplexity.ipynb
            
            
              Created
              April 13, 2024 18:28
            
              
                perplexity.ipynb
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## testing_perplexity.py
# https://huggingface.co/docs/transformers/perplexity

import datasets
import numpy as np
import torch
from torch.nn import CrossEntropyLoss
from transformers import AutoModelForCausalLM, AutoTokenizer

import evaluate
from evaluate import logging

## training_data_bin_builder.py
import numpy as np
import glob
import os

def read_directory(directory_path):
    file_paths = glob.glob(os.path.join(directory_path, '*.bin'))
    file_paths = [file for file in file_paths if os.path.basename(file) != 'train.bin']
    return file_paths

def batch_group_files(file_list, batch_size):

## gist:b9e2b55ec7ecd2d44c1057f2263792f2
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
git lfs install
git clone https://huggingface.co/datasets/cerebras/SlimPajama-627B


pip install datasets


#!/bin/bash

## prepare.py
import os
import json
import time
import glob
import jsonlines
import tiktoken
import numpy as np
import zstandard as zstd
from tqdm import tqdm
from io import StringIO

## gist:de53d7d25c5c8ffb81dfa938f79c5df0
Size	Training Tokens	Layers	Hidden Size	Attention Heads	Context Length
OLMo 1B	3 Trillion	16	2048	16	2048
https://huggingface.co/allenai/OLMo-1B


git lfs install
git clone https://huggingface.co/datasets/cerebras/SlimPajama-627B

git lfs install
git clone https://huggingface.co/datasets/tiiuae/falcon-refinedweb

## estimates.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bigsnarfdude
                / estimates.ipynb
            
            
              Last active
              April 2, 2024 00:35
            
              
                gpt2 or tinyllama estimates A100 time needed to train the model: 43.31 days
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## gist:f8ee49ba6165e815facf187fdf9f783e
Phi-2
> 24 layers
250 Billion Tokens
96 X A100
14 Days


Model Name nparams nlayers dmodel nheads dhead Batch Size Learning Rate
GPT-3 Small 125M 12 768 12 64 0.5M 6.0 × 10−4

## lambda_processing_h100.py
import os
from openai import OpenAI
import prompt


def truncate_words(input_string, max_words):
    words = input_string.split()
    truncated_words = words[:max_words]
    return ' '.join(truncated_words)

	import os
	import pickle
	from contextlib import nullcontext
	import torch
	import tiktoken
	from model import GPTConfig, GPT

	import datasets
	import numpy as np
	# https://huggingface.co/docs/transformers/perplexity

	import datasets
	import numpy as np
	import torch
	from torch.nn import CrossEntropyLoss
	from transformers import AutoModelForCausalLM, AutoTokenizer

	import evaluate
	from evaluate import logging
	import numpy as np
	import glob
	import os

	def read_directory(directory_path):
	file_paths = glob.glob(os.path.join(directory_path, '*.bin'))
	file_paths = [file for file in file_paths if os.path.basename(file) != 'train.bin']
	return file_paths

	def batch_group_files(file_list, batch_size):
	curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh \| sudo bash
	git lfs install
	git clone https://huggingface.co/datasets/cerebras/SlimPajama-627B


	pip install datasets


	#!/bin/bash
	import os
	import json
	import time
	import glob
	import jsonlines
	import tiktoken
	import numpy as np
	import zstandard as zstd
	from tqdm import tqdm
	from io import StringIO
	Size Training Tokens Layers Hidden Size Attention Heads Context Length
	OLMo 1B 3 Trillion 16 2048 16 2048
	https://huggingface.co/allenai/OLMo-1B


	git lfs install
	git clone https://huggingface.co/datasets/cerebras/SlimPajama-627B

	git lfs install
	git clone https://huggingface.co/datasets/tiiuae/falcon-refinedweb
	Phi-2
	> 24 layers
	250 Billion Tokens
	96 X A100
	14 Days



	Model Name nparams nlayers dmodel nheads dhead Batch Size Learning Rate
	GPT-3 Small 125M 12 768 12 64 0.5M 6.0 × 10−4
	import os
	from openai import OpenAI
	import prompt



	def truncate_words(input_string, max_words):
	words = input_string.split()
	truncated_words = words[:max_words]
	return ' '.join(truncated_words)