This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pickle | |
from contextlib import nullcontext | |
import torch | |
import tiktoken | |
from model import GPTConfig, GPT | |
import datasets | |
import numpy as np |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://huggingface.co/docs/transformers/perplexity | |
import datasets | |
import numpy as np | |
import torch | |
from torch.nn import CrossEntropyLoss | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import evaluate | |
from evaluate import logging |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import glob | |
import os | |
def read_directory(directory_path): | |
file_paths = glob.glob(os.path.join(directory_path, '*.bin')) | |
file_paths = [file for file in file_paths if os.path.basename(file) != 'train.bin'] | |
return file_paths | |
def batch_group_files(file_list, batch_size): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash | |
git lfs install | |
git clone https://huggingface.co/datasets/cerebras/SlimPajama-627B | |
pip install datasets | |
#!/bin/bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import time | |
import glob | |
import jsonlines | |
import tiktoken | |
import numpy as np | |
import zstandard as zstd | |
from tqdm import tqdm | |
from io import StringIO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Size Training Tokens Layers Hidden Size Attention Heads Context Length | |
OLMo 1B 3 Trillion 16 2048 16 2048 | |
https://huggingface.co/allenai/OLMo-1B | |
git lfs install | |
git clone https://huggingface.co/datasets/cerebras/SlimPajama-627B | |
git lfs install | |
git clone https://huggingface.co/datasets/tiiuae/falcon-refinedweb |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Phi-2 | |
> 24 layers | |
250 Billion Tokens | |
96 X A100 | |
14 Days | |
Model Name nparams nlayers dmodel nheads dhead Batch Size Learning Rate | |
GPT-3 Small 125M 12 768 12 64 0.5M 6.0 × 10−4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from openai import OpenAI | |
import prompt | |
def truncate_words(input_string, max_words): | |
words = input_string.split() | |
truncated_words = words[:max_words] | |
return ' '.join(truncated_words) |