Alex Wang KeAWang

## ollama_fast_speech_text_speech.py
""" To use: install Ollama, clone OpenVoice, run this script in the OpenVoice directory
    brew install portaudio
    brew install git-lfs
    git lfs install

    git clone https://github.com/myshell-ai/OpenVoice
    cd OpenVoice
    git clone https://huggingface.co/myshell-ai/OpenVoice
    cp -r OpenVoice/* .


## fast_speech_text_speech.py
""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
    git clone https://github.com/myshell-ai/OpenVoice
    cd OpenVoice
    git clone https://huggingface.co/myshell-ai/OpenVoice
    cp -r OpenVoice/* .
    pip install whisper pynput pyaudio
"""

from openai import OpenAI
import time

## gbnf_grammar_generator.py
import inspect
import json
import re
import typing
from inspect import isclass, getdoc
from types import NoneType

from pydantic import BaseModel, Field
from pydantic.fields import FieldInfo
from typing import Any, Type, List, get_args, get_origin, Tuple, Union, Optional

## normcore-llm.md

      
              1 file
            
          
              209 forks
            
          
              38 comments
            
          
              2724 stars
            
          
                veekaybee
                / normcore-llm.md
            
            
              Last active
              May 18, 2024 01:40
            
              
                Normcore LLM Reads
              
          
    Anti-hype LLM reading list

Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
Foundational Concepts


Pre-Transformer Models


## gpu-monitor-with-executor-and-email.py
"""
GPU Monitor with Email and Execution

This script monitors the usage of GPUs on a system and, when there are enough free GPUs, execute a specified function.
The function run a bash script by default but could be any other executable code.
This script uses the GPUtil library to monitor GPU usage.

Preparation:
    1. `pip install GPUtil`
    2. define your own `func` if needed

## OpenAI Token Counting.ipynb

      
              1 file
            
          
              5 forks
            
          
              11 comments
            
          
              21 stars
            
          
                CGamesPlay
                / OpenAI Token Counting.ipynb
            
            
              Last active
              February 1, 2024 23:59
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## personalised_pushes.py
import weaviate
import csv
import openai

from weaviate.util import generate_uuid5, get_valid_uuid
from uuid import uuid4

OPENAI_API_KEY = "YOUR KEY"
WEAVIATE_URL = "YOUR URL"
openai.api_key = "YOUR KEY"

## llama-home.md

      
              1 file
            
          
              35 forks
            
          
              20 comments
            
          
              444 stars
            
          
                rain-1
                / llama-home.md
            
            
              Last active
              May 16, 2024 04:58
            
              
                How to run Llama 13B with a 6GB graphics card
              
          
    This worked on 14/May/23. The instructions will probably require updating in the future.

llama is a text prediction model similar to GPT-2, and the version of GPT-3 that has not been fine tuned yet.
It is also possible to run fine tuned versions (like alpaca or vicuna with this. I think. Those versions are more focused on answering questions)

Note: I have been told that this does not support multiple GPUs. It can only use a single GPU.
It is possible to run LLama 13B with a 6GB graphics card now! (e.g. a RTX 2060). Thanks to the amazing work involved in llama.cpp. The latest change is CUDA/cuBLAS which allows you pick an arbitrary number of the transformer layers to be run on the GPU. This is perfect for low VRAM.

Clone llama.cpp from git, I am on commit 08737ef720f0510c7ec2aa84d7f70c691073c35d.


## whisper_jax_endpoint.py
from gradio_client import Client


API_URL = "https://sanchit-gandhi-whisper-jax.hf.space/"

# set up the Gradio client
client = Client(API_URL)


def transcribe_audio(audio_path, task="transcribe", return_timestamps=False):

## mfu_compute.py
import torch
from torch.utils.flop_counter import FlopCounterMode
from triton.testing import do_bench

def get_flops_achieved(f):
    flop_counter = FlopCounterMode(display=False)
    with flop_counter:
        f()
    total_flops = flop_counter.get_total_flops()
    ms_per_iter = do_bench(f)
	""" To use: install Ollama, clone OpenVoice, run this script in the OpenVoice directory
	brew install portaudio
	brew install git-lfs
	git lfs install

	git clone https://github.com/myshell-ai/OpenVoice
	cd OpenVoice
	git clone https://huggingface.co/myshell-ai/OpenVoice
	cp -r OpenVoice/* .
	""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory
	git clone https://github.com/myshell-ai/OpenVoice
	cd OpenVoice
	git clone https://huggingface.co/myshell-ai/OpenVoice
	cp -r OpenVoice/* .
	pip install whisper pynput pyaudio
	"""

	from openai import OpenAI
	import time
	import inspect
	import json
	import re
	import typing
	from inspect import isclass, getdoc
	from types import NoneType

	from pydantic import BaseModel, Field
	from pydantic.fields import FieldInfo
	from typing import Any, Type, List, get_args, get_origin, Tuple, Union, Optional
	"""
	GPU Monitor with Email and Execution

	This script monitors the usage of GPUs on a system and, when there are enough free GPUs, execute a specified function.
	The function run a bash script by default but could be any other executable code.
	This script uses the GPUtil library to monitor GPU usage.

	Preparation:
	1. `pip install GPUtil`
	2. define your own `func` if needed
	import weaviate
	import csv
	import openai

	from weaviate.util import generate_uuid5, get_valid_uuid
	from uuid import uuid4

	OPENAI_API_KEY = "YOUR KEY"
	WEAVIATE_URL = "YOUR URL"
	openai.api_key = "YOUR KEY"
	from gradio_client import Client


	API_URL = "https://sanchit-gandhi-whisper-jax.hf.space/"

	# set up the Gradio client
	client = Client(API_URL)


	def transcribe_audio(audio_path, task="transcribe", return_timestamps=False):
	import torch
	from torch.utils.flop_counter import FlopCounterMode
	from triton.testing import do_bench

	def get_flops_achieved(f):
	flop_counter = FlopCounterMode(display=False)
	with flop_counter:
	f()
	total_flops = flop_counter.get_total_flops()
	ms_per_iter = do_bench(f)