Goals: Add links that are reasonable and good explanations of how stuff works. No hype and no vendor content if possible. Practical first-hand accounts of models in prod eagerly sought.
![Screenshot 2023-12-18 at 10 40 27 PM](https://private-user-images.githubusercontent.com/3837836/291468646-4c30ad72-76ee-4939-a5fb-16b570d38cf2.png?jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbSIsImtleSI6ImtleTUiLCJleHAiOjE3MTk2MzI3ODMsIm5iZiI6MTcxOTYzMjQ4MywicGF0aCI6Ii8zODM3ODM2LzI5MTQ2ODY0Ni00YzMwYWQ3Mi03NmVlLTQ5MzktYTVmYi0xNmI1NzBkMzhjZjIucG5nP1gtQW16LUFsZ29yaXRobT1BV1M0LUhNQUMtU0hBMjU2JlgtQW16LUNyZWRlbnRpYWw9QUtJQVZDT0RZTFNBNTNQUUs0WkElMkYyMDI0MDYyOSUyRnVzLWVhc3QtMSUyRnMzJTJGYXdzNF9yZXF1ZXN0JlgtQW16LURhdGU9MjAyNDA2MjlUMDM0MTIzWiZYLUFtei1FeHBpcmVzPTMwMCZYLUFtei1TaWduYXR1cmU9MDZjMjljMTFjZTY1OTc4YjI3MzhkNDllY2FmYjM3OTZjN2JmYWJiMTliMTQ5ZGEyMDEyOTM0MmY4MWY3ODUwZiZYLUFtei1TaWduZWRIZWFkZXJzPWhvc3QmYWN0b3JfaWQ9MCZrZXlfaWQ9MCZyZXBvX2lkPTAifQ.sg-8gAQQh-DJLVLvjMUEZigAXFBqKVRzZKYJR-bZtDY)
""" To use: install LLM studio (or Ollama), clone OpenVoice, run this script in the OpenVoice directory | |
git clone https://github.com/myshell-ai/OpenVoice | |
cd OpenVoice | |
git clone https://huggingface.co/myshell-ai/OpenVoice | |
cp -r OpenVoice/* . | |
pip install whisper pynput pyaudio | |
""" | |
from openai import OpenAI | |
import time |
Let's say we're trying to load a LLaMA model via AutoModelForCausalLM.from_pretrained
with 4-bit quantization in order to inference from it:
python -m generate.py
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, LlamaTokenizerFast, LlamaForCausalLM
import transformers
nvidia-smi
said this required 11181MiB, at least to train on the sequence lengths of prompt that occurred initially in the alpaca dataset (~337 token long prompts).
You can get this down to about 10.9GB if (by modifying qlora.py) you run torch.cuda.empty_cache()
after PEFT has been applied to your loaded model and before you begin training.
All instructions are written assuming your command-line shell is bash.
Clone repository:
#This module is meant for direct use only. For API-usage please check SDA-TRAINER. | |
#Based off NVIDIA's demo | |
import argparse | |
from threads.trt.models import CLIP, UNet, VAE | |
import os | |
import onnx | |
import torch | |
from diffusers import UNet2DConditionModel, AutoencoderKL | |
from transformers import CLIPTextModel | |
from threads.trt.utilities import Engine |
from huggingface_hub import hf_hub_download | |
from flax.serialization import msgpack_restore | |
from safetensors.flax import save_file | |
import numpy as np | |
filename = hf_hub_download("gpt2", filename="flax_model.msgpack") | |
with open(filename, "rb") as f: | |
data = f.read() | |
flax_weights = msgpack_restore(data) |
# %% | |
import replicate | |
model = replicate.models.get("prompthero/openjourney") | |
version = model.versions.get("9936c2001faa2194a261c01381f90e65261879985476014a0a37a334593a05eb") | |
PROMPT = "mdjrny-v4 style 360 degree equirectangular panorama photograph, Alps, giant mountains, meadows, rivers, rolling hills, trending on artstation, cinematic composition, beautiful lighting, hyper detailed, 8 k, photo, photography" | |
output = version.predict(prompt=PROMPT, width=1024, height=512) | |
# %% | |
# download the iamge from the url at output[0] | |
import requests |
# Got a bunch of .ckpt files to convert? | |
# Here's a handy script to take care of all that for you! | |
# Original .ckpt files are not touched! | |
# Make sure you have enough disk space! You are going to DOUBLE the size of your models folder! | |
# | |
# First, run: | |
# pip install torch torchsde==0.2.5 safetensors==0.2.5 | |
# | |
# Place this file in the **SAME DIRECTORY** as all of your .ckpt files, open a command prompt for that folder, and run: | |
# python convert_to_safe.py |
import mmap | |
import torch | |
import json | |
import os | |
from huggingface_hub import hf_hub_download | |
def load_file(filename, device): | |
with open(filename, mode="r", encoding="utf8") as file_obj: | |
with mmap.mmap(file_obj.fileno(), length=0, access=mmap.ACCESS_READ) as m: |
An image generated at resolution 512x512 then upscaled to 1024x1024 with Waifu Diffusion 1.3 Epoch 7.