td2sk/README.md

## README.md

      
    Raw
  

              README.md
            
          
    rinna.cpp windows パッチ

概要

rinna/japanese-gpt-neox-3.6b-instruction-ppo の llama.cpp 対応を Windows で動かすやつ
手順


パッチ元(後述)の対象コミットを取得
本 gists の diff を当てる
cmake でビルド (mkdir build; cd build; cmake ..; cmake --build . --config Release)


build/bin/Release 配下に quantize-gptneox, redpajama, redpajama-chat が作成される


examples\redpajama\scripts\convert_gptneox_to_ggml.py で rinna/japanese-gpt-neox-3.6b-instruction-ppo を gptneox f16 に変換
(必要に応じて) quantize-gptneox で (4) のモデルを quantize
(4)もしくは(5)のモデルを redpajama, redpajama-chat で利用

パッチ元


オリジナル: https://github.com/togethercomputer/redpajama.cpp.git
ベースコミット: b9e0389a8fee1a5a8fce1a58e5184194990308bd

参考


rinna.cpp を試す | npaka


## diff
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 0973a3f..4c32104 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -36,4 +36,5 @@ else()
     add_subdirectory(embedding)
     add_subdirectory(save-load-state)
     add_subdirectory(benchmark)
+    add_subdirectory(redpajama)
 endif()
diff --git a/examples/redpajama/CMakeLists.txt b/examples/redpajama/CMakeLists.txt
new file mode 100644
index 0000000..e2e6a21
--- /dev/null
+++ b/examples/redpajama/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(TARGET quantize-gptneox)
+add_executable(${TARGET} quantize-gptneox.cpp)
+target_link_libraries(${TARGET} PRIVATE  llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
+target_sources(${TARGET} PRIVATE gptneox.cpp ../../ggml.c)
+if(TARGET BUILD_INFO)
+  add_dependencies(${TARGET} BUILD_INFO)
+endif()
+
+set(TARGET redpajama)
+add_executable(${TARGET} main-redpajama.cpp)
+target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
+target_sources(${TARGET} PRIVATE gptneox.cpp ../../ggml.c common-gptneox.cpp)
+if(TARGET BUILD_INFO)
+  add_dependencies(${TARGET} BUILD_INFO)
+endif()
+
+set(TARGET redpajama-chat)
+add_executable(${TARGET} main-redpajama-chat.cpp)
+target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
+target_sources(${TARGET} PRIVATE gptneox.cpp ../../ggml.c common-gptneox.cpp)
+if(TARGET BUILD_INFO)
+  add_dependencies(${TARGET} BUILD_INFO)
+endif()
diff --git a/examples/redpajama/gptneox.cpp b/examples/redpajama/gptneox.cpp
index f7fcfc2..b112366 100644
--- a/examples/redpajama/gptneox.cpp
+++ b/examples/redpajama/gptneox.cpp
@@ -28,6 +28,7 @@
 #include <atomic>
 #include <mutex>
 #include <sstream>
+#include <numeric>

 // TODO: Add back in n_ctx (max_position_embeddings) to ggml model, it is currently hard-coded to 2048 max for llama

@@ -2824,11 +2825,11 @@ gptneox_token gptneox_str_to_token(struct gptneox_context * ctx, const char * st
 }

 gptneox_token gptneox_token_bos() {
-    return 0;
+    return 2;
 }

 gptneox_token gptneox_token_eos() {
-    return 0;
+    return 3;
 }

 // Varies depending on gptneox model, use gptneox_str_to_token instead
diff --git a/examples/redpajama/gptneox.h b/examples/redpajama/gptneox.h
index 1b1cfea..52b4dde 100644
--- a/examples/redpajama/gptneox.h
+++ b/examples/redpajama/gptneox.h
@@ -193,7 +193,7 @@ extern "C" {
     // Special tokens
     GPTNEOX_API gptneox_token gptneox_token_bos();
     GPTNEOX_API gptneox_token gptneox_token_eos();
-    // GPTNEOX_API gptneox_token gptneox_token_nl();
+    GPTNEOX_API gptneox_token gptneox_token_nl();

     // TODO: improve the last_n_tokens interface ?
     GPTNEOX_API gptneox_token gptneox_sample_top_p_top_k(
	diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
	index 0973a3f..4c32104 100644
	--- a/examples/CMakeLists.txt
	+++ b/examples/CMakeLists.txt
	@@ -36,4 +36,5 @@ else()
	add_subdirectory(embedding)
	add_subdirectory(save-load-state)
	add_subdirectory(benchmark)
	+ add_subdirectory(redpajama)
	endif()
	diff --git a/examples/redpajama/CMakeLists.txt b/examples/redpajama/CMakeLists.txt
	new file mode 100644
	index 0000000..e2e6a21
	--- /dev/null
	+++ b/examples/redpajama/CMakeLists.txt
	@@ -0,0 +1,26 @@
	+set(TARGET quantize-gptneox)
	+add_executable(${TARGET} quantize-gptneox.cpp)
	+target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
	+target_compile_features(${TARGET} PRIVATE cxx_std_11)
	+target_sources(${TARGET} PRIVATE gptneox.cpp ../../ggml.c)
	+if(TARGET BUILD_INFO)
	+ add_dependencies(${TARGET} BUILD_INFO)
	+endif()
	+
	+set(TARGET redpajama)
	+add_executable(${TARGET} main-redpajama.cpp)
	+target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
	+target_compile_features(${TARGET} PRIVATE cxx_std_11)
	+target_sources(${TARGET} PRIVATE gptneox.cpp ../../ggml.c common-gptneox.cpp)
	+if(TARGET BUILD_INFO)
	+ add_dependencies(${TARGET} BUILD_INFO)
	+endif()
	+
	+set(TARGET redpajama-chat)
	+add_executable(${TARGET} main-redpajama-chat.cpp)
	+target_link_libraries(${TARGET} PRIVATE llama ${CMAKE_THREAD_LIBS_INIT})
	+target_compile_features(${TARGET} PRIVATE cxx_std_11)
	+target_sources(${TARGET} PRIVATE gptneox.cpp ../../ggml.c common-gptneox.cpp)
	+if(TARGET BUILD_INFO)
	+ add_dependencies(${TARGET} BUILD_INFO)
	+endif()
	diff --git a/examples/redpajama/gptneox.cpp b/examples/redpajama/gptneox.cpp
	index f7fcfc2..b112366 100644
	--- a/examples/redpajama/gptneox.cpp
	+++ b/examples/redpajama/gptneox.cpp
	@@ -28,6 +28,7 @@
	#include <atomic>
	#include <mutex>
	#include <sstream>
	+#include <numeric>

	// TODO: Add back in n_ctx (max_position_embeddings) to ggml model, it is currently hard-coded to 2048 max for llama

	@@ -2824,11 +2825,11 @@ gptneox_token gptneox_str_to_token(struct gptneox_context * ctx, const char * st
	}

	gptneox_token gptneox_token_bos() {
	- return 0;
	+ return 2;
	}

	gptneox_token gptneox_token_eos() {
	- return 0;
	+ return 3;
	}

	// Varies depending on gptneox model, use gptneox_str_to_token instead
	diff --git a/examples/redpajama/gptneox.h b/examples/redpajama/gptneox.h
	index 1b1cfea..52b4dde 100644
	--- a/examples/redpajama/gptneox.h
	+++ b/examples/redpajama/gptneox.h
	@@ -193,7 +193,7 @@ extern "C" {
	// Special tokens
	GPTNEOX_API gptneox_token gptneox_token_bos();
	GPTNEOX_API gptneox_token gptneox_token_eos();
	- // GPTNEOX_API gptneox_token gptneox_token_nl();
	+ GPTNEOX_API gptneox_token gptneox_token_nl();

	// TODO: improve the last_n_tokens interface ?
	GPTNEOX_API gptneox_token gptneox_sample_top_p_top_k(