Bootstrap knowledge of LLMs ASAP. With a bias/focus to GPT.
Avoid being a link dump. Try to provide only valuable well tuned information.
Neural network links before starting with transformers.
""" | |
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy) | |
BSD License | |
""" | |
import numpy as np | |
# data I/O | |
data = open('input.txt', 'r').read() # should be simple plain text file | |
chars = list(set(data)) | |
data_size, vocab_size = len(data), len(chars) |
#!/bin/bash | |
iatest=$(expr index "$-" i) | |
####################################################### | |
# SOURCED ALIAS'S AND SCRIPTS BY zachbrowne.me | |
####################################################### | |
# Source global definitions | |
if [ -f /etc/bashrc ]; then | |
. /etc/bashrc |
L1 cache reference ......................... 0.5 ns
Branch mispredict ............................ 5 ns
L2 cache reference ........................... 7 ns
Mutex lock/unlock ........................... 25 ns
Main memory reference ...................... 100 ns
Compress 1K bytes with Zippy ............. 3,000 ns = 3 µs
Send 2K bytes over 1 Gbps network ....... 20,000 ns = 20 µs
SSD random read ........................ 150,000 ns = 150 µs
Read 1 MB sequentially from memory ..... 250,000 ns = 250 µs
I wrote this answer on stackexchange, here: https://stackoverflow.com/posts/12597919/
It was wrongly deleted for containing "proprietary information" years later. I think that's bullshit so I am posting it here. Come at me.
Amazon is a SOA system with 100s of services (or so says Amazon Chief Technology Officer Werner Vogels). How do they handle build and release?
""" | |
Create train, valid, test iterators for CIFAR-10 [1]. | |
Easily extended to MNIST, CIFAR-100 and Imagenet. | |
[1]: https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/4 | |
""" | |
import torch | |
import numpy as np |
# Copyright (c) 2019-present, Thomas Wolf. | |
# All rights reserved. This source code is licensed under the MIT-style license. | |
""" A very small and self-contained gist to train a GPT-2 transformer model on wikitext-103 """ | |
import os | |
from collections import namedtuple | |
from tqdm import tqdm | |
import torch | |
import torch.nn as nn | |
from torch.utils.data import DataLoader | |
from ignite.engine import Engine, Events |
# if input image is in range 0..1, please first multiply img by 255 | |
# assume image is ndarray of shape [height, width, channels] where channels can be 1, 3 or 4 | |
def imshow(img): | |
import cv2 | |
import IPython | |
_,ret = cv2.imencode('.jpg', img) | |
i = IPython.display.Image(data=ret) | |
IPython.display.display(i) |
import torch | |
from torch.utils.flop_counter import FlopCounterMode | |
from triton.testing import do_bench | |
def get_flops_achieved(f): | |
flop_counter = FlopCounterMode(display=False) | |
with flop_counter: | |
f() | |
total_flops = flop_counter.get_total_flops() | |
ms_per_iter = do_bench(f) |