Skip to content

Instantly share code, notes, and snippets.

@eric-unc
Created March 2, 2025 06:12
Show Gist options
  • Select an option

  • Save eric-unc/3e9a8af121e081b3907aa53dcb3441fa to your computer and use it in GitHub Desktop.

Select an option

Save eric-unc/3e9a8af121e081b3907aa53dcb3441fa to your computer and use it in GitHub Desktop.
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 940c5916..7e9cf5ae 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,5 +1,8 @@
cmake_minimum_required(VERSION 3.21)
+SET(CMAKE_C_COMPILER "/usr/local/opt/llvm@19/bin/clang")
+SET(CMAKE_CXX_COMPILER "/usr/local/opt/llvm@19/bin/clang++")
+
project(Ollama C CXX)
include(CheckLanguage)
@@ -20,6 +23,9 @@ set(GGML_BACKEND_DL ON)
set(GGML_BACKEND_SHARED ON)
set(GGML_SCHED_MAX_COPIES 4)
+set(GGML_METAL ON)
+set(GGML_METAL_EMBED_LIBRARY ON)
+
set(GGML_LLAMAFILE ON)
set(GGML_CUDA_PEER_MAX_BATCH_SIZE 128)
set(GGML_CUDA_GRAPHS ON)
diff --git a/discover/gpu_darwin.go b/discover/gpu_darwin.go
index dd5bf6e2..50b6bf0c 100644
--- a/discover/gpu_darwin.go
+++ b/discover/gpu_darwin.go
@@ -6,6 +6,12 @@ package discover
#cgo CFLAGS: -x objective-c
#cgo LDFLAGS: -framework Foundation -framework CoreGraphics -framework Metal
#include "gpu_info_darwin.h"
+
+#include <Metal/Metal.h>
+
+int isMetalSupported() {
+ return MTLCreateSystemDefaultDevice() != NULL;
+}
*/
import "C"
@@ -23,7 +29,7 @@ const (
func GetGPUInfo() GpuInfoList {
mem, _ := GetCPUMem()
- if runtime.GOARCH == "amd64" {
+ if runtime.GOARCH == "amd64" && C.isMetalSupported() == 0 {
return []GpuInfo{
{
Library: "cpu",
diff --git a/llama/llama.cpp/src/llama-mmap.cpp b/llama/llama.cpp/src/llama-mmap.cpp
index a9932633..040fe93d 100644
--- a/llama/llama.cpp/src/llama-mmap.cpp
+++ b/llama/llama.cpp/src/llama-mmap.cpp
@@ -4,6 +4,8 @@
#include "ggml.h"
+#include <errno.h>
+
#include <cstring>
#include <climits>
#include <stdexcept>
diff --git a/llama/llama.go b/llama/llama.go
index a20f2357..25d6fefa 100644
--- a/llama/llama.go
+++ b/llama/llama.go
@@ -60,14 +60,15 @@ func BackendInit() {
func PrintSystemInfo() string {
var compiler string
- switch C.get_compiler() {
+ /*switch C.get_compiler() {
case C.COMP_UNKNOWN:
compiler = "cgo(unknown_compiler)"
case C.COMP_GCC:
compiler = "cgo(gcc)"
case C.COMP_CLANG:
compiler = "cgo(clang)"
- }
+ }*/
+ compiler = "cgo(clang)"
return C.GoString(C.llama_print_system_info()) + compiler
}
diff --git a/ml/backend/ggml/ggml/src/CMakeLists.txt b/ml/backend/ggml/ggml/src/CMakeLists.txt
index 72b488dd..4ae893c6 100644
--- a/ml/backend/ggml/ggml/src/CMakeLists.txt
+++ b/ml/backend/ggml/ggml/src/CMakeLists.txt
@@ -301,6 +301,7 @@ else ()
ggml_add_cpu_backend_variant_impl("")
endif()
+
ggml_add_backend(BLAS)
ggml_add_backend(CANN)
ggml_add_backend(CUDA)
diff --git a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
index 318addec..74efff23 100644
--- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
+++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
@@ -60,13 +60,13 @@ static id<MTLDevice> ggml_backend_metal_device_acq(struct ggml_backend_metal_dev
if (ctx->mtl_device == nil) {
ctx->mtl_device = MTLCreateSystemDefaultDevice();
- ctx->has_simdgroup_reduction = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
+ ctx->has_simdgroup_reduction = false; // [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
ctx->has_simdgroup_reduction |= [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
- ctx->has_simdgroup_mm = [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
+ ctx->has_simdgroup_mm = false; // [ctx->mtl_device supportsFamily:MTLGPUFamilyApple7];
ctx->has_bfloat = [ctx->mtl_device supportsFamily:MTLGPUFamilyMetal3_GGML];
- ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
+ //ctx->has_bfloat |= [ctx->mtl_device supportsFamily:MTLGPUFamilyApple6];
#if defined(GGML_METAL_USE_BF16)
ctx->use_bfloat = ctx->has_bfloat;
@@ -2251,7 +2251,7 @@ static void ggml_metal_encode_node(
} else
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
- if ([device supportsFamily:MTLGPUFamilyApple7] &&
+ if (false /*[device supportsFamily:MTLGPUFamilyApple7]*/ &&
!ggml_is_transposed(src0) &&
!ggml_is_transposed(src1) &&
src1t == GGML_TYPE_F32 &&
@@ -2594,7 +2594,7 @@ static void ggml_metal_encode_node(
// TODO: for now, always use mat-vec kernels until we figure out how to improve the
// indirect matrix multiplication
// !!!
- if ([device supportsFamily:MTLGPUFamilyApple7] &&
+ if (false /*[device supportsFamily:MTLGPUFamilyApple7]*/ &&
ne00 % 32 == 0 && ne00 >= 64 &&
dst_rows > dst_rows_min) {
// some Metal matrix data types require aligned pointers
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment