Shital Shah sytelus

## min-char-rnn.py
"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np

# data I/O
data = open('input.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

## LLM.md

      
              2 files
            
          
              156 forks
            
          
              13 comments
            
          
              1598 stars
            
          
                rain-1
                / LLM.md
            
            
              Last active
              May 11, 2024 17:17
            
              
                LLM Introduction: Learn Language Models
              
          
    Purpose

Bootstrap knowledge of LLMs ASAP. With a bias/focus to GPT.
Avoid being a link dump. Try to provide only valuable well tuned information.
Prelude

Neural network links before starting with transformers.

  
## The Ultimate Bad Ass .bashrc File
#!/bin/bash
iatest=$(expr index "$-" i)

#######################################################
# SOURCED ALIAS'S AND SCRIPTS BY zachbrowne.me
#######################################################

# Source global definitions
if [ -f /etc/bashrc ]; then
	 . /etc/bashrc

## latency.markdown

      
              2 files
            
          
              742 forks
            
          
              50 comments
            
          
              4391 stars
            
          
                hellerbarde
                / latency.markdown
            
            
              Created
              May 31, 2012 13:16
                — forked from jboner/latency.txt
            
              
                Latency numbers every programmer should know
              
          
    Latency numbers every programmer should know

L1 cache reference ......................... 0.5 ns
Branch mispredict ............................ 5 ns
L2 cache reference ........................... 7 ns
Mutex lock/unlock ........................... 25 ns
Main memory reference ...................... 100 ns             
Compress 1K bytes with Zippy ............. 3,000 ns  =   3 µs
Send 2K bytes over 1 Gbps network ....... 20,000 ns  =  20 µs
SSD random read ........................ 150,000 ns  = 150 µs

Read 1 MB sequentially from memory ..... 250,000 ns = 250 µs

  
## normcore-llm.md

      
              1 file
            
          
              208 forks
            
          
              38 comments
            
          
              2716 stars
            
          
                veekaybee
                / normcore-llm.md
            
            
              Last active
              May 9, 2024 07:47
            
              
                Normcore LLM Reads
              
          
    Anti-hype LLM reading list

Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
Foundational Concepts


Pre-Transformer Models


## amazon.md

      
              1 file
            
          
              37 forks
            
          
              20 comments
            
          
              267 stars
            
          
                terabyte
                / amazon.md
            
            
              Created
              December 6, 2017 02:27
            
              
                Amazon's Build System
              
          
    Prologue

I wrote this answer on stackexchange, here:
https://stackoverflow.com/posts/12597919/
It was wrongly deleted for containing "proprietary information" years later.  I think that's bullshit so I am posting it here.  Come at me.
The Question

Amazon is a SOA system with 100s of services (or so says Amazon Chief Technology Officer Werner Vogels). How do they handle build and release?

  
## data_loader.py
"""
Create train, valid, test iterators for CIFAR-10 [1].
Easily extended to MNIST, CIFAR-100 and Imagenet.

[1]: https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/4
"""

import torch
import numpy as np

## gpt-2-wikitext-103.py
# Copyright (c) 2019-present, Thomas Wolf.
# All rights reserved. This source code is licensed under the MIT-style license.
""" A very small and self-contained gist to train a GPT-2 transformer model on wikitext-103 """
import os
from collections import namedtuple
from tqdm import tqdm
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from ignite.engine import Engine, Events

## ipython_display.py
# if input image is in range 0..1, please first multiply img by 255
# assume image is ndarray of shape [height, width, channels] where channels can be 1, 3 or 4
def imshow(img):
    import cv2
    import IPython
    _,ret = cv2.imencode('.jpg', img)
    i = IPython.display.Image(data=ret)
    IPython.display.display(i)

## mfu_compute.py
import torch
from torch.utils.flop_counter import FlopCounterMode
from triton.testing import do_bench

def get_flops_achieved(f):
    flop_counter = FlopCounterMode(display=False)
    with flop_counter:
        f()
    total_flops = flop_counter.get_total_flops()
    ms_per_iter = do_bench(f)
	"""
	Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
	BSD License
	"""
	import numpy as np

	# data I/O
	data = open('input.txt', 'r').read() # should be simple plain text file
	chars = list(set(data))
	data_size, vocab_size = len(data), len(chars)
	#!/bin/bash
	iatest=$(expr index "$-" i)

	#######################################################
	# SOURCED ALIAS'S AND SCRIPTS BY zachbrowne.me
	#######################################################

	# Source global definitions
	if [ -f /etc/bashrc ]; then
	. /etc/bashrc
	"""
	Create train, valid, test iterators for CIFAR-10 [1].
	Easily extended to MNIST, CIFAR-100 and Imagenet.

	[1]: https://discuss.pytorch.org/t/feedback-on-pytorch-for-kaggle-competitions/2252/4
	"""

	import torch
	import numpy as np
	# Copyright (c) 2019-present, Thomas Wolf.
	# All rights reserved. This source code is licensed under the MIT-style license.
	""" A very small and self-contained gist to train a GPT-2 transformer model on wikitext-103 """
	import os
	from collections import namedtuple
	from tqdm import tqdm
	import torch
	import torch.nn as nn
	from torch.utils.data import DataLoader
	from ignite.engine import Engine, Events
	# if input image is in range 0..1, please first multiply img by 255
	# assume image is ndarray of shape [height, width, channels] where channels can be 1, 3 or 4
	def imshow(img):
	import cv2
	import IPython
	_,ret = cv2.imencode('.jpg', img)
	i = IPython.display.Image(data=ret)
	IPython.display.display(i)
	import torch
	from torch.utils.flop_counter import FlopCounterMode
	from triton.testing import do_bench

	def get_flops_achieved(f):
	flop_counter = FlopCounterMode(display=False)
	with flop_counter:
	f()
	total_flops = flop_counter.get_total_flops()
	ms_per_iter = do_bench(f)