conda create -n llama2 python=3.9
conda activate llama2
# langchain
pip install langchain
#!/usr/bin/env python3 | |
# Dependencies | |
# ============================= | |
# pip install nltk transformers | |
import argparse | |
import sys | |
from pathlib import Path |
# Clone llama.cpp | |
git clone https://github.com/ggerganov/llama.cpp.git | |
cd llama.cpp | |
# Build it | |
make clean | |
LLAMA_METAL=1 make | |
# Download model | |
export MODEL=llama-2-13b-chat.ggmlv3.q4_0.bin |
# coding=utf-8 | |
# Copyright 2023 The HuggingFace Inc. team. All rights reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software |
👉 This is for testing purposes. It is using Docker. It will be slow and may crash. I suggest you read the FAQ and Docker Pre-requisites sections before jumping into this.
![]() |
---|
Stable Diffusion Prompt: A beautifully colored cat sitting in the clouds with a rainbow in the background, in the style of Andy Warhol |
Like this? ☕
import { z } from "zod"; | |
import { zodToTs, printNode } from "zod-to-ts"; | |
// Replace with your `openai` thing | |
import { openai } from "../openai.server"; | |
import endent from "endent"; | |
function createJSONCompletion<T extends z.ZodType>({ | |
prompt, | |
schema_name, |
The problem with large language models is that you can’t run these locally on your laptop. Thanks to Georgi Gerganov and his llama.cpp project, it is now possible to run Meta’s LLaMA on a single computer without a dedicated GPU.
There are multiple steps involved in running LLaMA locally on a M1 Mac after downloading the model weights.
(venv) # Exit:0 2023-03-12 16:59:27 [r2q2@Reformer#[:~/opt/llama.cpp] | |
$(: !605 ) ./main -m ./models/65B/ggml-model-q4_0.bin -t 8 -n 128 | |
main: seed = 1678658429 | |
llama_model_load: loading model from './models/65B/ggml-model-q4_0.bin' - please wait ... | |
llama_model_load: n_vocab = 32000 | |
llama_model_load: n_ctx = 512 | |
llama_model_load: n_embd = 8192 | |
llama_model_load: n_mult = 256 | |
llama_model_load: n_head = 64 | |
llama_model_load: n_layer = 80 |
#!/usr/bin/env bash | |
# Generate a pseudo UUID | |
uuid() | |
{ | |
local N B C='89ab' | |
for (( N=0; N < 16; ++N )) | |
do | |
B=$(( $RANDOM%256 )) |
sudo tail -F /var/log/apache2/error.log | |