Skip to content

Instantly share code, notes, and snippets.

@mausch
Last active November 25, 2023 11:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save mausch/f525326eb70528f04b626fb9e01fcb58 to your computer and use it in GitHub Desktop.
Save mausch/f525326eb70528f04b626fb9e01fcb58 to your computer and use it in GitHub Desktop.
llama-cpp-python nix flake
{
# inputs.llama-cpp.url = "github:ggerganov/llama.cpp/47068e517004d90f13c16352bb3b4cafd53a00cd";
inputs.llama-cpp.url = "github:mausch/llama.cpp/4ec0000d119289f3545283651491b75080bd75bd";
inputs.nixpkgs.url = "github:nixos/nixpkgs/78058d810644f5ed276804ce7ea9e82d92bee293";
inputs.flake-utils.url = "github:numtide/flake-utils/f9e7cf818399d17d347f847525c5a5a8032e4e44";
outputs = { self, nixpkgs, flake-utils, llama-cpp }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
model = pkgs.fetchurl {
url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf";
hash = "sha256-fd/if2G/mUVCwirKITxG7L2KYkzKdKv/Aqe1qMGPeH8=";
};
python =
pkgs.python3;
# need https://github.com/NixOS/nixpkgs/pull/244564
# pkgs.python3.override {
# self = python;
# packageOverrides = self: super: {
# pydantic = super.pydantic.overrideAttrs (args: rec {
# version = "2.3.0";
# src = pkgs.fetchPypi {
# inherit version;
# pname = "pydantic";
# hash = "sha256-FgfMEGYCKEzUoAiCmGVwRy8ZP96csSWbzurtsmqnmm0=";
# };
# buildinputs = super.pydantic.buildInputs ++ [
# # super.pdm
# ];
# patches = [];
# });
# # pydantic = super.buildPythonPackage rec {
# # pname = "pydantic";
# # version = "2.3.0";
# # format = "pyproject";
# # buildInputs = [
# # super.hatchling
# # super.hatch-fancy-pypi-readme
# # ];
# # src = pkgs.fetchPypi {
# # inherit pname version;
# # hash = "sha256-FgfMEGYCKEzUoAiCmGVwRy8ZP96csSWbzurtsmqnmm0=";
# # };
# # };
# };
# };
pydantic-settings = python.pkgs.buildPythonPackage rec {
pname = "pydantic_settings";
version = "2.0.3";
format = "pyproject";
src = pkgs.fetchPypi {
inherit pname version;
hash = "sha256-li3DZySVqtaulqQ5D6x+WTWR4URiXlES01n49n+3WUU=";
};
buildInputs = [
python.pkgs.hatchling
];
propagatedBuildInputs = [
python.pkgs.pydantic
];
};
llama-cpp-python = target: python.pkgs.buildPythonPackage rec {
pname = "llama-cpp-python";
version = "0.1.83";
disabled = python.pythonOlder "3.6";
format = "pyproject";
preBuild = ''
cd ..
mkdir -p vendor/llama.cpp
ln -s ${llama-cpp.packages.${system}.${target}}/lib/libllama.so vendor/llama.cpp/libllama.so
'';
src = pkgs.fetchFromGitHub {
owner = "abetlen";
repo = "llama-cpp-python";
rev = "v${version}";
hash = "sha256-IaE1vKqhDpVLHnRx0RcZOkcAUI4apAtNf+i9Hz8eCQI=";
};
buildInputs = [
pkgs.cmake
];
nativeBuildInputs = [
llama-cpp.packages.${system}.${target}
python.pkgs.scikit-build
python.pkgs.ninja
python.pkgs.cmake
];
propagatedBuildInputs = [
python.pkgs.diskcache
python.pkgs.numpy
python.pkgs.typing-extensions
python.pkgs.fastapi
python.pkgs.uvicorn
python.pkgs.pydantic
# pydantic-settings # need https://github.com/NixOS/nixpkgs/pull/244564
];
pythonImportsCheck = [
# can't check due to sandbox: "hwloc/linux: failed to find sysfs cpu topology directory, aborting linux discovery"
# "llama_cpp"
];
};
in
rec {
packages.default = llama-cpp-python "default";
packages.opencl = llama-cpp-python "opencl";
packages.cuda = llama-cpp-python "cuda";
packages.python = pkgs.python3.withPackages (ps: [
packages.default
]);
packages.python-opencl = pkgs.python3.withPackages (ps: [
packages.opencl
]);
packages.python-cuda = pkgs.python3.withPackages (ps: [
packages.cuda
]);
# requires GPU
apps.default =
let
runpy = pkgs.writeText "run.py" ''
from llama_cpp import Llama
llm = Llama(model_path="${model}", n_ctx=4096, n_gpu_layers=41)
response = llm("<s>[INST]<<SYS>>Eres Pablo Neruda <</SYS>> Escribe un poema sobre el sol y las nubes[/INST]", stop=["</s>"], max_tokens=9999)
print(response)
'';
start = pkgs.writeShellScript "start.sh" ''
${packages.python-opencl}/bin/python ${runpy}
'';
in {
type = "app";
program = "${start}";
};
});
}
@mausch
Copy link
Author

mausch commented Sep 10, 2023

e.g.

$ nix shell --refresh --no-write-lock-file git+https://gist.github.com/f525326eb70528f04b626fb9e01fcb58.git#python
$ python
from llama_cpp import Llama
llm = Llama(model_path="<PATH_TO_MODEL>")
llm("my name is ")

Or this run an example llama-2 on a GPU machine with OpenCL:

nix run --refresh --no-write-lock-file git+https://gist.github.com/f525326eb70528f04b626fb9e01fcb58.git

Need NixOS/nixpkgs#244564 to build the OpenAI-compatible server.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment