-
-
Save eric-unc/3e9a8af121e081b3907aa53dcb3441fa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/CMakeLists.txt b/CMakeLists.txt | |
| index 940c5916..7e9cf5ae 100644 | |
| --- a/CMakeLists.txt | |
| +++ b/CMakeLists.txt | |
| @@ -1,5 +1,8 @@ | |
| cmake_minimum_required(VERSION 3.21) | |
| +SET(CMAKE_C_COMPILER "/usr/local/opt/llvm@19/bin/clang") | |
| +SET(CMAKE_CXX_COMPILER "/usr/local/opt/llvm@19/bin/clang++") | |
| + | |
| project(Ollama C CXX) | |
| include(CheckLanguage) | |
| @@ -20,6 +23,9 @@ set(GGML_BACKEND_DL ON) | |
| set(GGML_BACKEND_SHARED ON) | |
| set(GGML_SCHED_MAX_COPIES 4) | |
| +set(GGML_METAL ON) | |
| +set(GGML_METAL_EMBED_LIBRARY ON) | |
| + | |
| set(GGML_LLAMAFILE ON) | |
| set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128) | |
| set(GGML_CUDA_GRAPHS ON) | |
| diff --git a/discover/gpu_darwin.go b/discover/gpu_darwin.go | |
| index dd5bf6e2..50b6bf0c 100644 | |
| --- a/discover/gpu_darwin.go | |
| +++ b/discover/gpu_darwin.go | |
| @@ -6,6 +6,12 @@ package discover | |
| #cgo CFLAGS: -x objective-c | |
| #cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal | |
| #include "gpu_info_darwin.h" | |
| + | |
| +#include <Metal/Metal.h> | |
| + | |
| +int isMetalSupported() { | |
| + return MTLCreateSystemDefaultDevice() != NULL; | |
| +} | |
| */ | |
| import "C" | |
| @@ -23,7 +29,7 @@ const ( | |
| func GetGPUInfo() GpuInfoList { | |
| mem, _ := GetCPUMem() | |
| - if runtime.GOARCH == "amd64" { | |
| + if runtime.GOARCH == "amd64" && C.isMetalSupported() == 0 { | |
| return []GpuInfo{ | |
| { | |
| Library: "cpu", | |
| diff --git a/llama/llama.cpp/src/llama-mmap.cpp b/llama/llama.cpp/src/llama-mmap.cpp | |
| index a9932633..040fe93d 100644 | |
| --- a/llama/llama.cpp/src/llama-mmap.cpp | |
| +++ b/llama/llama.cpp/src/llama-mmap.cpp | |
| @@ -4,6 +4,8 @@ | |
| #include "ggml.h" | |
| +#include <errno.h> | |
| + | |
| #include <cstring> | |
| #include <climits> | |
| #include <stdexcept> | |
| diff --git a/llama/llama.go b/llama/llama.go | |
| index a20f2357..25d6fefa 100644 | |
| --- a/llama/llama.go | |
| +++ b/llama/llama.go | |
| @@ -60,14 +60,15 @@ func BackendInit() { | |
| func PrintSystemInfo() string { | |
| var compiler string | |
| - switch C.get_compiler() { | |
| + /*switch C.get_compiler() { | |
| case C.COMP_UNKNOWN: | |
| compiler = "cgo(unknown_compiler)" | |
| case C.COMP_GCC: | |
| compiler = "cgo(gcc)" | |
| case C.COMP_CLANG: | |
| compiler = "cgo(clang)" | |
| - } | |
| + }*/ | |
| + compiler = "cgo(clang)" | |
| return C.GoString(C.llama_print_system_info()) + compiler | |
| } | |
| diff --git a/ml/backend/ggml/ggml/src/CMakeLists.txt b/ml/backend/ggml/ggml/src/CMakeLists.txt | |
| index 72b488dd..4ae893c6 100644 | |
| --- a/ml/backend/ggml/ggml/src/CMakeLists.txt | |
| +++ b/ml/backend/ggml/ggml/src/CMakeLists.txt | |
| @@ -301,6 +301,7 @@ else () | |
| ggml_add_cpu_backend_variant_impl("") | |
| endif() | |
| + | |
| ggml_add_backend(BLAS) | |
| ggml_add_backend(CANN) | |
| ggml_add_backend(CUDA) | |
| diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m | |
| index 318addec..74efff23 100644 | |
| --- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m | |
| +++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m | |
| @@ -60,13 +60,13 @@ static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_dev | |
| if (ctx->mtl_device == nil) { | |
| ctx->mtl_device = MTLCreateSystemDefaultDevice(); | |
| - ctx->has_simdgroup_reduction = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7]; | |
| + ctx->has_simdgroup_reduction = false; // [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7]; | |
| ctx->has_simdgroup_reduction |= [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML]; | |
| - ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7]; | |
| + ctx->has_simdgroup_mm = false; // [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7]; | |
| ctx->has_bfloat = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML]; | |
| - ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6]; | |
| + //ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6]; | |
| #if defined(GGML_METAL_USE_BF16) | |
| ctx->use_bfloat = ctx->has_bfloat; | |
| @@ -2251,7 +2251,7 @@ static void ggml_metal_encode_node( | |
| } else | |
| // for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs | |
| // AMD GPU and older A-chips will reuse matrix-vector multiplication kernel | |
| - if ([device supportsFamily:MTLGPUFamilyApple7] && | |
| + if (false /*[device supportsFamily:MTLGPUFamilyApple7]*/ && | |
| !ggml_is_transposed(src0) && | |
| !ggml_is_transposed(src1) && | |
| src1t == GGML_TYPE_F32 && | |
| @@ -2594,7 +2594,7 @@ static void ggml_metal_encode_node( | |
| // TODO: for now, always use mat-vec kernels until we figure out how to improve the | |
| // indirect matrix multiplication | |
| // !!! | |
| - if ([device supportsFamily:MTLGPUFamilyApple7] && | |
| + if (false /*[device supportsFamily:MTLGPUFamilyApple7]*/ && | |
| ne00 % 32 == 0 && ne00 >= 64 && | |
| dst_rows > dst_rows_min) { | |
| // some Metal matrix data types require aligned pointers |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment