mausch/flake.nix

## flake.nix
{
  # inputs.llama-cpp.url = "github:ggerganov/llama.cpp/47068e517004d90f13c16352bb3b4cafd53a00cd";
  inputs.llama-cpp.url = "github:mausch/llama.cpp/4ec0000d119289f3545283651491b75080bd75bd";
  inputs.nixpkgs.url = "github:nixos/nixpkgs/78058d810644f5ed276804ce7ea9e82d92bee293";
  inputs.flake-utils.url = "github:numtide/flake-utils/f9e7cf818399d17d347f847525c5a5a8032e4e44";

  outputs = { self, nixpkgs, flake-utils, llama-cpp }:
    flake-utils.lib.eachDefaultSystem (system:
      let
        pkgs = nixpkgs.legacyPackages.${system};

        model = pkgs.fetchurl {
          url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf";
          hash = "sha256-fd/if2G/mUVCwirKITxG7L2KYkzKdKv/Aqe1qMGPeH8=";
        };

        python =
          pkgs.python3;
          # need https://github.com/NixOS/nixpkgs/pull/244564
          # pkgs.python3.override {
          #   self = python;


          #   packageOverrides = self: super: {
          #     pydantic = super.pydantic.overrideAttrs (args: rec {
          #       version = "2.3.0";
          #       src = pkgs.fetchPypi {
          #         inherit version;
          #         pname = "pydantic";
          #         hash = "sha256-FgfMEGYCKEzUoAiCmGVwRy8ZP96csSWbzurtsmqnmm0=";
          #       };
          #       buildinputs = super.pydantic.buildInputs ++ [
          #         # super.pdm
          #       ];
          #       patches = [];
          #     });
          #     # pydantic = super.buildPythonPackage rec {
          #     #   pname = "pydantic";
          #     #   version = "2.3.0";
          #     #   format = "pyproject";
          #     #   buildInputs = [
          #     #     super.hatchling
          #     #     super.hatch-fancy-pypi-readme
          #     #   ];
          #     #   src = pkgs.fetchPypi {
          #     #     inherit pname version;
          #     #     hash = "sha256-FgfMEGYCKEzUoAiCmGVwRy8ZP96csSWbzurtsmqnmm0=";
          #     #   };
          #     # };
          #   };
          # };

        pydantic-settings = python.pkgs.buildPythonPackage rec {
          pname = "pydantic_settings";
          version = "2.0.3";
          format = "pyproject";
          src = pkgs.fetchPypi {
            inherit pname version;
            hash = "sha256-li3DZySVqtaulqQ5D6x+WTWR4URiXlES01n49n+3WUU=";
          };
          buildInputs = [
            python.pkgs.hatchling
          ];
          propagatedBuildInputs = [
            python.pkgs.pydantic
          ];
        };

        llama-cpp-python = target: python.pkgs.buildPythonPackage rec {
          pname = "llama-cpp-python";
          version = "0.1.83";
          disabled = python.pythonOlder "3.6";
          format = "pyproject";

          preBuild = ''
            cd ..
            mkdir -p vendor/llama.cpp
            ln -s ${llama-cpp.packages.${system}.${target}}/lib/libllama.so vendor/llama.cpp/libllama.so
          '';

          src = pkgs.fetchFromGitHub {
            owner = "abetlen";
            repo = "llama-cpp-python";
            rev = "v${version}";
            hash = "sha256-IaE1vKqhDpVLHnRx0RcZOkcAUI4apAtNf+i9Hz8eCQI=";
          };

          buildInputs = [
            pkgs.cmake
          ];

          nativeBuildInputs = [
            llama-cpp.packages.${system}.${target}
            python.pkgs.scikit-build
            python.pkgs.ninja
            python.pkgs.cmake
          ];

          propagatedBuildInputs = [
            python.pkgs.diskcache
            python.pkgs.numpy
            python.pkgs.typing-extensions
            python.pkgs.fastapi
            python.pkgs.uvicorn
            python.pkgs.pydantic
            # pydantic-settings # need https://github.com/NixOS/nixpkgs/pull/244564
          ];

          pythonImportsCheck = [
            # can't check due to sandbox: "hwloc/linux: failed to find sysfs cpu topology directory, aborting linux discovery"
            # "llama_cpp"
          ];
        };
      in
      rec {
        packages.default = llama-cpp-python "default";
        packages.opencl = llama-cpp-python "opencl";
        packages.cuda = llama-cpp-python "cuda";

        packages.python = pkgs.python3.withPackages (ps: [
          packages.default
        ]);

        packages.python-opencl = pkgs.python3.withPackages (ps: [
          packages.opencl
        ]);

        packages.python-cuda = pkgs.python3.withPackages (ps: [
          packages.cuda
        ]);

        # requires GPU
        apps.default =
          let
            runpy = pkgs.writeText "run.py" ''
              from llama_cpp import Llama
              llm = Llama(model_path="${model}", n_ctx=4096, n_gpu_layers=41)
              response = llm("<s>[INST]<<SYS>>Eres Pablo Neruda <</SYS>> Escribe un poema sobre el sol y las nubes[/INST]", stop=["</s>"], max_tokens=9999)
              print(response)
            '';
            start = pkgs.writeShellScript "start.sh" ''
              ${packages.python-opencl}/bin/python ${runpy}
            '';
          in {
            type = "app";
            program = "${start}";
          };
      });
}
	{
	# inputs.llama-cpp.url = "github:ggerganov/llama.cpp/47068e517004d90f13c16352bb3b4cafd53a00cd";
	inputs.llama-cpp.url = "github:mausch/llama.cpp/4ec0000d119289f3545283651491b75080bd75bd";
	inputs.nixpkgs.url = "github:nixos/nixpkgs/78058d810644f5ed276804ce7ea9e82d92bee293";
	inputs.flake-utils.url = "github:numtide/flake-utils/f9e7cf818399d17d347f847525c5a5a8032e4e44";

	outputs = { self, nixpkgs, flake-utils, llama-cpp }:
	flake-utils.lib.eachDefaultSystem (system:
	let
	pkgs = nixpkgs.legacyPackages.${system};

	model = pkgs.fetchurl {
	url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_K_M.gguf";
	hash = "sha256-fd/if2G/mUVCwirKITxG7L2KYkzKdKv/Aqe1qMGPeH8=";
	};

	python =
	pkgs.python3;
	# need https://github.com/NixOS/nixpkgs/pull/244564
	# pkgs.python3.override {
	# self = python;


	# packageOverrides = self: super: {
	# pydantic = super.pydantic.overrideAttrs (args: rec {
	# version = "2.3.0";
	# src = pkgs.fetchPypi {
	# inherit version;
	# pname = "pydantic";
	# hash = "sha256-FgfMEGYCKEzUoAiCmGVwRy8ZP96csSWbzurtsmqnmm0=";
	# };
	# buildinputs = super.pydantic.buildInputs ++ [
	# # super.pdm
	# ];
	# patches = [];
	# });
	# # pydantic = super.buildPythonPackage rec {
	# # pname = "pydantic";
	# # version = "2.3.0";
	# # format = "pyproject";
	# # buildInputs = [
	# # super.hatchling
	# # super.hatch-fancy-pypi-readme
	# # ];
	# # src = pkgs.fetchPypi {
	# # inherit pname version;
	# # hash = "sha256-FgfMEGYCKEzUoAiCmGVwRy8ZP96csSWbzurtsmqnmm0=";
	# # };
	# # };
	# };
	# };

	pydantic-settings = python.pkgs.buildPythonPackage rec {
	pname = "pydantic_settings";
	version = "2.0.3";
	format = "pyproject";
	src = pkgs.fetchPypi {
	inherit pname version;
	hash = "sha256-li3DZySVqtaulqQ5D6x+WTWR4URiXlES01n49n+3WUU=";
	};
	buildInputs = [
	python.pkgs.hatchling
	];
	propagatedBuildInputs = [
	python.pkgs.pydantic
	];
	};

	llama-cpp-python = target: python.pkgs.buildPythonPackage rec {
	pname = "llama-cpp-python";
	version = "0.1.83";
	disabled = python.pythonOlder "3.6";
	format = "pyproject";

	preBuild = ''
	cd ..
	mkdir -p vendor/llama.cpp
	ln -s ${llama-cpp.packages.${system}.${target}}/lib/libllama.so vendor/llama.cpp/libllama.so
	'';

	src = pkgs.fetchFromGitHub {
	owner = "abetlen";
	repo = "llama-cpp-python";
	rev = "v${version}";
	hash = "sha256-IaE1vKqhDpVLHnRx0RcZOkcAUI4apAtNf+i9Hz8eCQI=";
	};

	buildInputs = [
	pkgs.cmake
	];

	nativeBuildInputs = [
	llama-cpp.packages.${system}.${target}
	python.pkgs.scikit-build
	python.pkgs.ninja
	python.pkgs.cmake
	];

	propagatedBuildInputs = [
	python.pkgs.diskcache
	python.pkgs.numpy
	python.pkgs.typing-extensions
	python.pkgs.fastapi
	python.pkgs.uvicorn
	python.pkgs.pydantic
	# pydantic-settings # need https://github.com/NixOS/nixpkgs/pull/244564
	];

	pythonImportsCheck = [
	# can't check due to sandbox: "hwloc/linux: failed to find sysfs cpu topology directory, aborting linux discovery"
	# "llama_cpp"
	];
	};
	in
	rec {
	packages.default = llama-cpp-python "default";
	packages.opencl = llama-cpp-python "opencl";
	packages.cuda = llama-cpp-python "cuda";

	packages.python = pkgs.python3.withPackages (ps: [
	packages.default
	]);

	packages.python-opencl = pkgs.python3.withPackages (ps: [
	packages.opencl
	]);

	packages.python-cuda = pkgs.python3.withPackages (ps: [
	packages.cuda
	]);

	# requires GPU
	apps.default =
	let
	runpy = pkgs.writeText "run.py" ''
	from llama_cpp import Llama
	llm = Llama(model_path="${model}", n_ctx=4096, n_gpu_layers=41)
	response = llm("<s>[INST]<<SYS>>Eres Pablo Neruda <</SYS>> Escribe un poema sobre el sol y las nubes[/INST]", stop=["</s>"], max_tokens=9999)
	print(response)
	'';
	start = pkgs.writeShellScript "start.sh" ''
	${packages.python-opencl}/bin/python ${runpy}
	'';
	in {
	type = "app";
	program = "${start}";
	};
	});
	}