fastdaima

## static_kv_cache.py
from transformers import AutoModelForCausalLM, AutoTokenizer, StaticCache
import torch
from typing import Optional
device = "cuda"

# Copied from the gpt-fast repo
def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
    q = torch.empty_like(probs_sort).exponential_(1)
    return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)

## Ubuntu 20.04 for Deep Learning.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                fastdaima
                / Ubuntu 20.04 for Deep Learning.md
            
            
              Created
              November 9, 2023 19:28
                — forked from LingxiaoShawn/Ubuntu 20.04 for Deep Learning.md
            
          
    Ubuntu 20.04 for Deep Learning

In the name of God
This gist contains steps to setup Ubuntu 20.04 for deep learning.

Install Ubuntu 20.04:

  
## Ubuntu 22.04 for Deep Learning.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                fastdaima
                / Ubuntu 22.04 for Deep Learning.md
            
            
              Created
              November 7, 2023 17:30
                — forked from amir-saniyan/Ubuntu 22.04 for Deep Learning.md
            
          
    Ubuntu 22.04 for Deep Learning

In the name of God
This gist contains steps to setup Ubuntu 22.04 for deep learning.

Install Ubuntu 22.04


## pull-all.sh
#!/usr/bin/env bash
for f in $(<~/git/repos); do
  cd ~/git/$f
  git pull > /dev/null &
  cd - > /dev/null
done

wait < <(jobs -p)

for f in $(<~/git/repos); do
	from transformers import AutoModelForCausalLM, AutoTokenizer, StaticCache
	import torch
	from typing import Optional
	device = "cuda"

	# Copied from the gpt-fast repo
	def multinomial_sample_one_no_sync(probs_sort): # Does multinomial sampling without a cuda synchronization
	q = torch.empty_like(probs_sort).exponential_(1)
	return torch.argmax(probs_sort / q, dim=-1, keepdim=True).to(dtype=torch.int)
	#!/usr/bin/env bash
	for f in $(<~/git/repos); do
	cd ~/git/$f
	git pull > /dev/null &
	cd - > /dev/null
	done

	wait < <(jobs -p)

	for f in $(<~/git/repos); do