Andrew Ginns andrewginns

## docker-compose.yaml
services:
  meshnet:
    image: ghcr.io/mattstechinfo/meshnet:latest
    networks:
      app-meshnet:
        ipv4_address: 172.20.0.2
    restart: unless-stopped
    cap_add:
      - NET_ADMIN
      - NET_RAW

## llm_perf_comparison.md

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              0 stars
            
          
                andrewginns
                / llm_perf_comparison.md
            
            
              Last active
              January 12, 2024 10:06
            
              
                Comparison of LLM performance on varied hardware
              
          
    Based on ggerganov/llama.cpp#4167
LLaMA 7B

PP means "prompt processing" (bs = 512), TG means "text-generation" (bs = 1), t/s means "tokens per second"


BW
[GB/s]
GPU
Cores
F16 PP
[t/s]
F16 TG
[t/s]
Q8_0 PP
[t/s]
Q8_0 TG
[t/s]
Q4_0 PP
[t/s]
Q4_0 TG
[t/s]


✅ M1 Pro 16GB
200
14
262.65
12.75
235.16
21.95
232.55
35.52


✅ [M3 Pro 36


## tweet_dumper.py
#!/usr/bin/env python
# encoding: utf-8

import tweepy #https://github.com/tweepy/tweepy
import csv


#Twitter API credentials
consumer_key = ""
	services:
	meshnet:
	image: ghcr.io/mattstechinfo/meshnet:latest
	networks:
	app-meshnet:
	ipv4_address: 172.20.0.2
	restart: unless-stopped
	cap_add:
	- NET_ADMIN
	- NET_RAW
	BW [GB/s]	GPU Cores	F16 PP [t/s]	F16 TG [t/s]	Q8_0 PP [t/s]	Q8_0 TG [t/s]	Q4_0 PP [t/s]	Q4_0 TG [t/s]
✅ M1 Pro 16GB	200	14	262.65	12.75	235.16	21.95	232.55	35.52
✅ [M3 Pro 36
	#!/usr/bin/env python
	# encoding: utf-8

	import tweepy #https://github.com/tweepy/tweepy
	import csv



	#Twitter API credentials
	consumer_key = ""