Skip to content

Instantly share code, notes, and snippets.

@skanga
Last active May 30, 2024 23:11
Show Gist options
  • Save skanga/f7b9e1550c1dbe6dfe09c469702d4838 to your computer and use it in GitHub Desktop.
Save skanga/f7b9e1550c1dbe6dfe09c469702d4838 to your computer and use it in GitHub Desktop.
JLLM 1.0 pre-release
#!/bin/bash
# Check if JAVA_HOME is set. If so then use it.
if [ -n "$JAVA_HOME" ]; then
JAVA_CMD="$JAVA_HOME/bin/java"
else
JAVA_CMD="java"
fi
# Check for java command existence
if ! $JAVA_CMD -version >/dev/null 2>&1; then
echo "Java could not be found. Please install Java 11 or later."
exit 1
fi
# Check for the correct java version
JAVA_VER_MAJOR=$( $JAVA_CMD -version 2>&1 | awk '/version/{gsub(/"/,"");split($3, arr, "."); print arr[1]}' )
if [ "$JAVA_VER_MAJOR" -lt 11 ]; then
echo "Java version is less than 11. Please upgrade to Java 11 or later."
exit 1
fi
# Check for HTTP and HTTPS proxy settings
if [ -n "$https_proxy" ] ; then
echo $https_proxy | grep "@"
if [ $? -eq 0 ]; then # If variable has username and password, its parse method is different
PROXY_HOST=$(echo $https_proxy | sed 's/http:\/\/.*@\(.*\):.*/\1/')
PROXY_PORT=$(echo $https_proxy | sed 's/http:\/\/.*@.*:\(.*\)/\1/' | tr -d "/")
PROXY_USER=$(echo $https_proxy | sed 's/http:\/\/\(.*\)@.*/\1/'|awk -F: '{print $1}')
PROXY_PASS=$(echo $https_proxy | sed 's/http:\/\/\(.*\)@.*/\1/'|awk -F: '{print $2}')
else # If it doesn't have username and password, its parse method this
PROXY_HOST=$(echo $https_proxy | sed 's/http:\/\/\(.*\):.*/\1/')
PROXY_PORT=$(echo $https_proxy | sed 's/http:\/\/.*:\(.*\)/\1/' | tr -d "/")
fi
fi
# Check if proxy host and port are set
if [[ -n "${PROXY_HOST}" && -n "${PROXY_PORT}" ]]; then
# Initialize proxy environment variable
PROXY_ENV="-Dhttps.proxyHost=${PROXY_HOST} -Dhttps.proxyPort=${PROXY_PORT}"
# Check if proxy user and password are set
if [[ -n "${PROXY_USER}" && -n "${PROXY_PASS}" ]]; then
# Append proxy user and password to the proxy environment variable
PROXY_ENV="${PROXY_ENV} -Dhttps.proxyUser=${PROXY_USER} -Dhttps.proxyPassword=${PROXY_PASS}"
fi
# Print the proxy environment variable
echo "Proxy environment variable: ${PROXY_ENV}"
fi
# Check for JLLM path location
JLLM_PATH=$(dirname "$0")
if [[ ${JLLM_PATH} == *" "* ]]; then
echo "ERROR: The JLLM path contains spaces. Please move it to a location without spaces."
exit 1
fi
# Run the java program
echo "Running jllm with Java $JAVA_VER_MAJOR"
"$JAVA_CMD" -DJLLM=${JLLM_PATH} ${PROXY_ENV} ${JLLM_PATH}/jllm.java "$@"
@echo off
setlocal enabledelayedexpansion
rem Set the directory of this batch file
set "batchDir=%~dp0"
rem Find the Java executable in the PATH
set "javaExe="
for %%A in (java.exe) do (
set "javaExe=%%~$PATH:A"
)
if not exist "%javaExe%" (
echo Java executable not found in PATH.
exit /b 1
)
rem Set the full path of the Java source file
set "javaSourceFile=%batchDir%jllm.java"
rem Compile and run the Java source file
"%javaExe%" %javaSourceFile% %*
@echo off
:: Check for HTTP and HTTPS proxy settings
set http_proxy=%http_proxy%
set https_proxy=%https_proxy%
if not "%http_proxy%"=="" if not "%https_proxy%"=="" (
:: Proxy settings found, create the PROXY_ENV variable
for /f "tokens=2 delims=@" %%a in ("%http_proxy%") do (
for /f "tokens=1,2 delims=:" %%b in ("%%a") do (
set "http_proxy_host=%%b"
set "http_proxy_port=%%c"
)
)
for /f "tokens=2 delims=@" %%a in ("%https_proxy%") do (
for /f "tokens=1,2 delims=:" %%b in ("%%a") do (
set "https_proxy_host=%%b"
set "https_proxy_port=%%c"
)
)
set PROXY_ENV=-Dhttp.proxyHost=%http_proxy_host% -Dhttp.proxyPort=%http_proxy_port% -Dhttps.proxyHost=%https_proxy_host% -Dhttps.proxyPort=%https_proxy_port%
echo Proxy settings found. Proxy environment variable: %PROXY_ENV%
) else (
echo No proxy settings found.
)
:: Check for JLLM path location
SET JLLM_PATH=%~dp0
if not "%JLLM_PATH%"=="%JLLM_PATH: =%" (
echo ERROR: The JLLM path contains spaces. Please move it to a location without spaces
exit /b 1
)
:: Run the java program
echo Running jllm with java %JAVA_VER_MAJOR%
%JAVA_CMD% -DJLLM=%JLLM_PATH% %PROXY_ENV% "%~dp0\jllm.java" %*
endlocal
import java.io.*;
import java.net.*;
import java.nio.file.*;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
// TODO:
// Download and parse the first 2mb of the gguf to detect metadata (ScanGGUF.java in JavaLlamaExample project)
// Ask/query/infer/inference command to call a running model
// Info command to launch model card
// Test on MacOS
// Should we add/support any Non HuggingFace models? Are there any? Where? Currently we download all models from 🤗 only
// Should we add support for huge models which are split into multiple files like Falcon, Goliath & MegaDolphin?
// Much more & better error checking/reporting
// More Unit tests
// ONGOING - Keep up with latest models and add them to the list
/*
To run with a proxy
set JAVA_TOOL_OPTIONS=-Dhttp.proxyHost=www-proxy-hqdc.us.oracle.com -Dhttp.proxyPort=80 -Dhttps.proxyHost=www-proxy-hqdc.us.oracle.com -Dhttps.proxyPort=80
Some of the quantization methods in llama.cpp
GGML_TYPE_Q2_K - "type-1" 2-bit quantization in super-blocks containing 16 blocks, each block having 16 weight. Block scales and mins are quantized with 4 bits. This ends up effectively using 2.5625 bits per weight (bpw)
GGML_TYPE_Q3_K - "type-0" 3-bit quantization in super-blocks containing 16 blocks, each block having 16 weights. Scales are quantized with 6 bits. This end up using 3.4375 bpw.
GGML_TYPE_Q4_K - "type-1" 4-bit quantization in super-blocks containing 8 blocks, each block having 32 weights. Scales and mins are quantized with 6 bits. This ends up using 4.5 bpw.
GGML_TYPE_Q5_K - "type-1" 5-bit quantization. Same super-block structure as GGML_TYPE_Q4_K resulting in 5.5 bpw
GGML_TYPE_Q6_K - "type-0" 6-bit quantization. Super-blocks with 16 blocks, each block having 16 weights. Scales are quantized with 8 bits. This ends up using 6.5625 bpw
GGML_TYPE_Q8_K - "type-0" 8-bit quantization. Only used for quantizing intermediate results. The difference to the existing Q8_0 is that the block size is 256. All 2-6 bit dot products are implemented for this quantization type.
Similar projects:
llm - https://github.com/simonw/llm
localllm - https://github.com/GoogleCloudPlatform/localllm
glai - https://github.com/laelhalawani/glai
*/
public class jllm {
static boolean IS_WINDOWS = System.getProperty("os.name").startsWith("Windows");
static int DEFAULT_PORT = 8080;
static int BUFFER_SIZE = 1024 * 100;
static String DEFAULT_QUANT = "Q4_K_M";
static String DEFAULT_FILE_EXT = "gguf";
static String LLAMA_EXEC = "llamafile";
static String LLAMA_PARAMS = "--model %s --host %s --port %s --log-disable -ngl 9999";
// NOTE: The param --log-disable is misleading because it does not disable log, but only redirects to stdout
// Initialize JLLM_DIR from environment variable called JLLM
static String JLLM_DIR = System.getenv("JLLM");
// If missing use .jllm in user's home dir
static {
if (JLLM_DIR == null) // No environment variable called JLLM
JLLM_DIR = System.getProperty("JLLM"); // Read system property JLLM
if (JLLM_DIR != null && Files.isDirectory(Paths.get(JLLM_DIR + File.separator + ".jllm")))
JLLM_DIR = JLLM_DIR + File.separator + ".jllm"; // Found .jllm via system property. Use it.
else // Finally, default to user's home dir
JLLM_DIR = System.getProperty("user.home") + File.separator + ".jllm";
}
static SortedMap<String, String> MODEL_MAP = new TreeMap<>();
static {
MODEL_MAP.put("airoboros", "TheBloke/Airoboros-L2-70B-3.1.2-GGUF");
MODEL_MAP.put("alfred", "TheBloke/alfred-40B-1023-GGUF");
MODEL_MAP.put("artigenz-coder", "zarugeos/Artigenz-Coder-DS-6.7B-Q4_K_M-GGUF/artigenz-coder-ds-6.7b-q4_k_m.gguf");
MODEL_MAP.put("athena-llamacoder3", "mradermacher/Athena-llama-Coder-3-8B-GGUF/Athena-llama-Coder-3-8B.Q4_K_M.gguf");
MODEL_MAP.put("aya23", "bartowski/aya-23-8B-GGUF/aya-23-8B-Q4_K_M.gguf");
MODEL_MAP.put("aya23-8b", "legraphista/aya-23-8B-IMat-GGUF/aya-23-8B.Q4_K_S.gguf");
MODEL_MAP.put("aya23-35b", "bartowski/aya-23-35B-GGUF/aya-23-35B-Q4_K_M.gguf");
MODEL_MAP.put("aya23-35b-imat", "legraphista/aya-23-35B-IMat-GGUF/aya-23-35B.Q4_K_S.gguf");
MODEL_MAP.put("aya101", "kcoopermiller/aya-101-GGUF/aya-101.Q4_K.gguf");
MODEL_MAP.put("bagel", "mradermacher/bagel-8b-v1.0-i1-GGUF/bagel-8b-v1.0.i1-Q4_K_M.gguf");
MODEL_MAP.put("bakllava", "abetlen/BakLLaVA-1-GGUF/bakllava-1.Q6_K.gguf/mmproj-model-f16.gguf");
MODEL_MAP.put("bakllava-mistral", "AI-Engine/BakLLaVA1-MistralLLaVA-7B-GGUF/BakLLaVA1-MistralLLaVA-7B.q5_K_M.gguf/BakLLaVA1-clip-mmproj-model-f16.gguf");
MODEL_MAP.put("barcenas", "mradermacher/Barcenas-9b-GGUF/Barcenas-9b.Q8_0.gguf");
MODEL_MAP.put("bigmaid", "mradermacher/BigMaid-20B-v1.0-GGUF/BigMaid-20B-v1.0.Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("biomistral", "BioMistral/BioMistral-7B-GGUF/ggml-model-Q4_K_M.gguf");
MODEL_MAP.put("biomistral-zephyr", "BioMistral/BioMistral-7B-Zephyr-Beta-SLERP-GGUF/ggml-model-Q4_K_M.gguf");
MODEL_MAP.put("bling-phi3", "llmware/bling-phi-3-gguf/bling-phi-3.gguf");
MODEL_MAP.put("bling-stablelm", "llmware/bling-stablelm-3b-gguf/bling-stablelm.gguf");
MODEL_MAP.put("bling-stablelm-3b", "maddes8cht/llmware-bling-stable-lm-3b-4e1t-v0-gguf/llmware-bling-stable-lm-3b-4e1t-v0-Q4_K_M.gguf");
MODEL_MAP.put("blue-orchid", "nakodanei/Blue-Orchid-2x7b_GGUF/Blue-Orchid-2x7b-Q4_K_M.gguf");
MODEL_MAP.put("borealis", "mradermacher/Borealis-10.7B-GGUF/Borealis-10.7B.Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("bunny", "BAAI/Bunny-v1_0-4B-gguf/ggml-model-Q4_K_M.gguf");
MODEL_MAP.put("bunny-llama3", "BAAI/Bunny-Llama-3-8B-V-gguf/ggml-model-Q4_K_M.gguf");
MODEL_MAP.put("cabrallama3", "mradermacher/CabraLlama3-8b-32k-GGUF/CabraLlama3-8b-32k.Q4_K_M.gguf");
MODEL_MAP.put("capytess", "TheBloke/CapyTessBorosYi-34B-200K-DARE-Ties-GGUF");
MODEL_MAP.put("cat-llama3", "mradermacher/Cat-Llama-3-70B-instruct-GGUF/Cat-Llama-3-70B-instruct.Q4_K_M.gguf");
MODEL_MAP.put("cat-llama3-i1", "mradermacher/Cat-Llama-3-70B-instruct-i1-GGUF/Cat-Llama-3-70B-instruct.i1-Q4_K_M.gguf");
MODEL_MAP.put("causal", "CausalLM/72B-preview-GGUF/72b-q4_k_m.gguf");
MODEL_MAP.put("chimera-llama", "PrunaAI/ChimeraLlama-3-8B-v2-GGUF-smashed/ChimeraLlama-3-8B-v2.Q4_K_M.gguf");
MODEL_MAP.put("chupacabra", "mradermacher/Chupacabra-8x7B-MoE-i1-GGUF/Chupacabra-8x7B-MoE.i1-Q4_K_M.gguf");
MODEL_MAP.put("claude2-alpaca", "TheBloke/claude2-alpaca-7B-GGUF");
MODEL_MAP.put("claude2-alpaca-13b", "TheBloke/claude2-alpaca-13B-GGUF");
MODEL_MAP.put("codebooga", "TheBloke/CodeBooga-34B-v0.1-GGUF");
MODEL_MAP.put("codegen", "securecodegen/codegen25-7b-gguf/codegen25-7b.gguf");
MODEL_MAP.put("codegen25", "sokada/codegen25-7b-multi-gguf-with-dummy-tokenizer/ggml-model-f16.gguf");
MODEL_MAP.put("codegemma-1.1-7b-it", "bartowski/codegemma-1.1-7b-it-GGUF/codegemma-1.1-7b-it-Q5_K_M.gguf");
MODEL_MAP.put("codegemma-2b", "bartowski/codegemma-2b-GGUF/codegemma-2b-Q4_K_M.gguf");
MODEL_MAP.put("codegemma-7b", "MaziyarPanahi/codegemma-7b-GGUF");
MODEL_MAP.put("codegemma-7b-it", "bartowski/codegemma-7b-it-GGUF/codegemma-7b-it-Q4_K_M.gguf");
MODEL_MAP.put("codegemma-2b-smashed", "PrunaAI/codegemma-2b-GGUF-smashed/codegemma-2b.Q4_K_M.gguf");
MODEL_MAP.put("codegemma-7b-smashed", "PrunaAI/codegemma-7b-GGUF-smashed/codegemma-7b.Q4_K_M.gguf");
MODEL_MAP.put("codegemma-7b-sql", "Fduv/fine_tuned_text-to-sql_codegemma-7b-v0.2_GGUF_q4_k_m/fine_tuned_text-to-sql_codegemma-7b-q4_k_m.gguf");
MODEL_MAP.put("codellama", "TheBloke/CodeLlama-7B-GGUF");
MODEL_MAP.put("codellama-7b", "lmstudio-community/CodeLlama-7B-KStack-GGUF/CodeLlama-7B-KStack-Q4_K_M.gguf");
MODEL_MAP.put("codellama-7b-clean", "lmstudio-community/CodeLlama-7B-KStack-clean-GGUF/CodeLlama-7B-KStack-clean-Q4_K_M.gguf");
MODEL_MAP.put("codellama-34b", "TheBloke/CodeLlama-34B-Instruct-GGUF");
MODEL_MAP.put("codellama-70b", "TheBloke/CodeLlama-70B-Instruct-GGUF");
MODEL_MAP.put("codellama3-unsloth", "ALI-B/codellama3-v3-gguf/codellama3-v3-gguf-unsloth.Q4_K_M.gguf");
MODEL_MAP.put("codellama3-8b", "bartowski/Code-Llama-3-8B-GGUF/Code-Llama-3-8B-Q4_K_M.gguf");
MODEL_MAP.put("codellama3-8b-smashed", "PrunaAI/ajibawa-2023-Code-Llama-3-8B-GGUF-smashed/Code-Llama-3-8B.Q4_K_M.gguf");
MODEL_MAP.put("codellama3-8b-unsloth", "ALI-B/codellama3-8b-gguf/codellama3-8b-gguf-unsloth.Q4_K_M.gguf");
MODEL_MAP.put("codeninja", "TheBloke/CodeNinja-1.0-OpenChat-7B-GGUF");
MODEL_MAP.put("codeqwen", "Qwen/CodeQwen1.5-7B-Chat-GGUF/codeqwen-1_5-7b-chat-q4_k_m.gguf");
MODEL_MAP.put("codeqwen-smashed", "PrunaAI/CodeQwen1.5-7B-Chat-GGUF-smashed/CodeQwen1.5-7B-Chat.Q4_K_M.gguf");
MODEL_MAP.put("codeup", "TheBloke/CodeUp-Alpha-13B-HF-GGUF");
MODEL_MAP.put("command-r", "bartowski/c4ai-command-r-v01-GGUF/c4ai-command-r-v01-Q4_K_M.gguf");
MODEL_MAP.put("command-r-imat", "dranger003/c4ai-command-r-v01-iMat.GGUF/ggml-c4ai-command-r-v01-q4_k_m.gguf");
MODEL_MAP.put("command-r-plus", "ehristoforu/c4ai-command-r-plus-Q2_K-GGUF/c4ai-command-r-plus.Q2_K.gguf");
MODEL_MAP.put("coomand-r-35b", "TheDrummer/Coomand-R-35B-v1-GGUF/Coomand-R-35B-v1-Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("cyber-llama3", "mradermacher/cyber-risk-llama-3-8b-GGUF");
MODEL_MAP.put("daredevil", "mradermacher/Daredevil-8B-GGUF/Daredevil-8B.Q8_0.gguf");
MODEL_MAP.put("daybreak-mixtral", "mradermacher/daybreak-mixtral-8x7b-v1.0-hf-GGUF/daybreak-mixtral-8x7b-v1.0-hf.Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("dbrx-instruct", "dranger003/dbrx-instruct-iMat.GGUF/ggml-dbrx-instruct-16x12b-iq2_xs.gguf");
MODEL_MAP.put("deci", "Deci/DeciLM-7B-instruct-GGUF/decilm-7b-uniform-gqa-q8_0.gguf");
MODEL_MAP.put("deepmind", "mradermacher/DEEP_MIND-GGUF/DEEP_MIND.Q4_K_M.gguf");
MODEL_MAP.put("deepmoney", "TheBloke/deepmoney-34b-200k-base-GGUF");
MODEL_MAP.put("deepmoney-chat", "TheBloke/deepmoney-34b-200k-chat-evaluator-GGUF");
MODEL_MAP.put("deepseek", "TheBloke/deepseek-llm-67b-base-GGUF");
MODEL_MAP.put("deepseek-7b", "second-state/Deepseek-LLM-7B-Chat-GGUF/deepseek-llm-7b-chat-Q5_K_M.gguf");
MODEL_MAP.put("deepseek-coder", "lmstudio-community/deepseek-coder-6.7B-kexer-GGUF/deepseek-coder-6.7B-kexer-Q4_K_M.gguf");
MODEL_MAP.put("deepseek-coder-instruct", "TheBloke/deepseek-coder-6.7B-instruct-GGUF");
MODEL_MAP.put("deepseek-coder-kexer", "bartowski/deepseek-coder-6.7B-kexer-GGUF/deepseek-coder-6.7B-kexer-Q4_K_M.gguf");
MODEL_MAP.put("deepseek-sql", "ukung/DeepSeek-SQL-Expert-GGUF/DeepSeek-SQL-Expert-q4_k_m.gguf");
MODEL_MAP.put("dolphin-llama3", "cognitivecomputations/dolphin-2.9-llama3-8b-gguf/dolphin-2.9-llama3-8b-q4_K_M.gguf");
MODEL_MAP.put("dolphin-mistral", "TheBloke/dolphin-2.0-mistral-7B-GGUF");
MODEL_MAP.put("dolphin-mistral-7b", "second-state/dolphin-2.6-mistral-7B-GGUF/dolphin-2.6-mistral-7b-Q5_K_M.gguf");
MODEL_MAP.put("dolphin-mixtral-8x7b", "TheBloke/dolphin-2.5-mixtral-8x7b-GGUF");
MODEL_MAP.put("dolphin-phi", "TheBloke/dolphin-2_6-phi-2-GGUF");
MODEL_MAP.put("dolphin-phi-smashed", "PrunaAI/Dolphin-2.9.1-Phi-3-Kensho-4.5B-GGUF-smashed/Dolphin-2.9.1-Phi-3-Kensho-4.5B.Q4_K_M.gguf");
MODEL_MAP.put("dolphin-yi", "bartowski/dolphin-2.9.1-yi-1.5-9b-GGUF/dolphin-2.9.1-yi-1.5-9b-Q4_K_M.gguf");
MODEL_MAP.put("dpopenhermes", "TheBloke/DPOpenHermes-7B-GGUF/dpopenhermes-7b.Q4_K_M.gguf");
MODEL_MAP.put("dpopenhermes-v2", "TheBloke/DPOpenHermes-7B-v2-GGUF/dpopenhermes-7b-v2.Q4_K_M.gguf");
MODEL_MAP.put("dpopenhermes-mistral", "MaziyarPanahi/DPOpenHermes-7B-v2-Mistral-7B-Instruct-v0.2-slerp-GGUF/DPOpenHermes-7B-v2-Mistral-7B-Instruct-v0.2-slerp.Q4_K_M.gguf");
MODEL_MAP.put("dpopenhermes-mistral-instruct", "MaziyarPanahi/DPOpenHermes-7B-v2-Mistral-7B-Instruct-v0.1-GGUF/DPOpenHermes-7B-v2-Mistral-7B-Instruct-v0.1.Q4_K_M.gguf");
MODEL_MAP.put("dragon-falcon", "maddes8cht/llmware-dragon-falcon-7b-v0-gguf/llmware-dragon-falcon-7b-v0-Q4_K_M.gguf");
MODEL_MAP.put("dragon-mistral", "llmware/dragon-mistral-7b-v0/dragon-mistral-7b-q4_k_m.gguf");
MODEL_MAP.put("dragon-yi", "llmware/dragon-yi-6b-v0/dragon-yi-6b-q4_k_m.gguf");
MODEL_MAP.put("dr-samantha", "TheBloke/Dr_Samantha-7B-GGUF");
MODEL_MAP.put("einstein-llama3", "PrunaAI/Weyaxi-Einstein-v6.1-Llama3-8B-GGUF-smashed/Einstein-v6.1-Llama3-8B.Q4_K_M.gguf");
// https://future.mozilla.org/news/llamafiles-for-embeddings-in-local-rag-applications/
MODEL_MAP.put("embedding-sfr-mistral", "Mozilla/SFR-Embedding-Mistral-llamafile/ggml-sfr-embedding-mistral-f16.llamafile");
MODEL_MAP.put("embedding-e5-mistral", "Mozilla/e5-mistral-7b-instruct/e5-mistral-7b-instruct-Q5_K_M.llamafile");
MODEL_MAP.put("embedding-mxbai-large", "Mozilla/mxbai-embed-large-v1-llamafile/mxbai-embed-large-v1-f16.llamafile");
MODEL_MAP.put("estigiax-tinyllama", "franciscobdl/EstigiaxTinyLlama1.1-Q4_K_M-GGUF/estigiaxtinyllama1.1.Q4_K_M.gguf");
MODEL_MAP.put("emo", "PrunaAI/EMO-1.5B-GGUF-smashed/EMO-1.5B.Q4_K_M.gguf");
MODEL_MAP.put("everyone-coder", "TheBloke/Everyone-Coder-4x7b-Base-GGUF/everyone-coder-4x7b-base.Q4_K_M.gguf");
MODEL_MAP.put("everyone-coder-33b", "TheBloke/Everyone-Coder-33B-Base-GGUF/everyone-coder-33b-base.Q4_K_M.gguf");
MODEL_MAP.put("everythinglm", "TheBloke/EverythingLM-13B-16K-GGUF");
MODEL_MAP.put("falcon", "maddes8cht/tiiuae-falcon-7b-instruct-gguf/tiiuae-falcon-7b-instruct-Q4_K_M.gguf");
MODEL_MAP.put("falcon2", "DevQuasar/falcon2-11B-GGUF/falcon2-11B.Q4_0.gguf");
MODEL_MAP.put("falcon2-5.5b", "ssmits/Falcon2-5.5B-multilingual-GGUF");
MODEL_MAP.put("faro-yi", "bartowski/Faro-Yi-9B-DPO-GGUF/Faro-Yi-9B-DPO-Q4_K_M.gguf");
MODEL_MAP.put("faro-yi-9b", "ggalmeida0/Faro-Yi-9B-DPO-Q8_0-GGUF/faro-yi-9b-dpo-q8_0.gguf");
MODEL_MAP.put("fimbulvetr", "mradermacher/Fimbulvetr-11B-v2-i1-GGUF/Fimbulvetr-11B-v2.i1-Q4_K_M.gguf");
MODEL_MAP.put("finance", "TheBloke/finance-LLM-GGUF");
MODEL_MAP.put("finance-13b", "TheBloke/finance-LLM-13B-GGUF");
MODEL_MAP.put("finance-chat", "TheBloke/finance-chat-GGUF");
MODEL_MAP.put("franken-mistral", "mradermacher/Franken-Mistral-Merlinite-Maid-10B-GGUF/Franken-Mistral-Merlinite-Maid-10B.Q4_K_M.gguf");
//MODEL_MAP.put("goliath", "TheBloke/goliath-120b-GGUF"); // Split models not yet supported
//MODEL_MAP.put("gemma-2b", "google/gemma-2b-it/gemma-2b-it.gguf"); // Needs auth - not yet supported
//MODEL_MAP.put("gemma-7b", "google/gemma-7b-it/gemma-7b-it.gguf"); // Needs auth - not yet supported
// For gemma also see https://huggingface.co/google/gemma-7b-it/discussions/38
//MODEL_MAP.put("gemma-2b", "google/gemma-2b-it/gemma-2b-it.gguf"); // Needs auth - not yet supported
//MODEL_MAP.put("gemma-7b", "google/gemma-7b-it/gemma-7b-it.gguf"); // Needs auth - not yet supported
MODEL_MAP.put("gemma-2b", "LoneStriker/gemma-2b-GGUF/gemma-2b-Q4_K_M.gguf");
MODEL_MAP.put("gemma-2b-hindi", "jayshah5696/gemma_2b_hindi_gguf/gemma-2b-hindi-Q6_K.gguf");
MODEL_MAP.put("gemma-2b-indo", "ukung/Gemma2B-Indo-FineTune-GGUF/Gemma2B-Indo-FineTune-q4_k_m.gguf");
MODEL_MAP.put("gemma-2b-it", "second-state/Gemma-2b-it-GGUF/gemma-2b-it-Q5_K_M.gguf");
MODEL_MAP.put("gemma-2b-it-smashed", "PrunaAI/gemma-2b-it-GGUF-smashed/gemma-2b-it.Q4_K_M.gguf");
MODEL_MAP.put("gemma-2b-smashed", "PrunaAI/gemma-2b-GGUF-smashed/gemma-2b.Q4_K_M.gguf");
MODEL_MAP.put("gemma-2b-txt2sql", "ukung/Gemma2B-Text-To-SQL-Expert-GGUF/Gemma2B-Text-To-SQL-Expert-q4_k_m.gguf");
MODEL_MAP.put("gemma-2b-zephyr", "MoMonir/gemma-2b-zephyr-dpo-GGUF");
MODEL_MAP.put("gemma-7b", "LoneStriker/gemma-7b-GGUF/gemma-7b-Q4_K_M.gguf");
MODEL_MAP.put("gemma-7b-it", "LoneStriker/gemma-7b-it-GGUF/gemma-7b-it-Q4_K_M.gguf");
MODEL_MAP.put("gemmalpaca-2b", "LoneStriker/Gemmalpaca-2B-GGUF/Gemmalpaca-2B-Q4_K_M.gguf");
MODEL_MAP.put("gemmalpaca-7b", "LoneStriker/Gemmalpaca-7B-GGUF/Gemmalpaca-7B-Q4_K_M.gguf");
MODEL_MAP.put("goat", "mradermacher/GOAT-70B-Storytelling-i1-GGUF/GOAT-70B-Storytelling.i1-Q4_K_M.gguf");
MODEL_MAP.put("gpt4all", "maddes8cht/nomic-ai-gpt4all-falcon-gguf/nomic-ai-gpt4all-falcon-Q4_K_M.gguf");
MODEL_MAP.put("gradientputri-megamix", "s3nh/gradientputri-MegaMix-S1-13B-GGUF/gradientputri-MegaMix-S1-13B.Q4_K_M.gguf");
MODEL_MAP.put("granite", "NikolayKozloff/granite-8b-code-base-Q8_0-GGUF/granite-8b-code-base.Q8_0.gguf");
MODEL_MAP.put("granite-20b", "bartowski/granite-20b-code-instruct-GGUF/granite-20b-code-instruct-Q4_K_M.gguf");
MODEL_MAP.put("granite-20b-code", "cobrakenji/granite-20b-code-base-GGUF");
MODEL_MAP.put("granite-34b", "hiddenblue1/granite-34b-code-instruct-Q4_K_M-GGUF/granite-34b-code-instruct.Q4_K_M.gguf");
MODEL_MAP.put("granite-34b-code", "ibm-granite/granite-34b-code-base-GGUF");
MODEL_MAP.put("granite-34b-instruct", "ibm-granite/granite-34b-code-instruct-GGUF");
MODEL_MAP.put("granite-code", "akparmar/granite-8b-code-instruct-GGUF-quantized/Q4_K_M.gguf");
MODEL_MAP.put("granite-instruct", "YorkieOH10/granite-8b-code-instruct-Q4_K_M-GGUF/granite-8b-code-instruct.Q4_K_M.gguf");
MODEL_MAP.put("granite-instruct-34b", "cobrakenji/granite-34b-code-instruct-Q4_K_M-GGUF/granite-34b-code-instruct.Q4_K_M.gguf");
MODEL_MAP.put("granite-code-base", "YorkieOH10/granite-20b-code-base-Q8_0-GGUF/granite-20b-code-base.Q8_0.gguf");
MODEL_MAP.put("granite-code-instruct", "cobrakenji/granite-20b-code-instruct-Q5_K_M-GGUF/granite-20b-code-instruct.Q5_K_M.gguf");
MODEL_MAP.put("guanaco", "mradermacher/guanaco-65b-i1-GGUF/guanaco-65B-HF.i1-Q4_K_M.gguf");
MODEL_MAP.put("gurullama3", "wifibaby4u/Guru-Llama-3-8B-Chat-GGUF/Guru-Llama-3-8B-Chat.q6_k.gguf");
MODEL_MAP.put("halu-llama3", "mradermacher/Halu-8B-Llama3-v0.3-i1-GGUF/Halu-8B-Llama3-v0.3.i1-Q4_K_M.gguf");
MODEL_MAP.put("hermes-llama3", "NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf");
MODEL_MAP.put("hermesstar-llama3", "RichardErkhov/Ba2han_-_HermesStar-OrcaWind-Synth-11B-gguf/HermesStar-OrcaWind-Synth-11B.Q4_K_M.gguf");
MODEL_MAP.put("icelatte", "mradermacher/IceLatteRP-7b-GGUF/IceLatteRP-7b.Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("internlm2", "bartowski/internlm2-math-plus-7b-GGUF/internlm2-math-plus-7b-Q4_K_M.gguf");
MODEL_MAP.put("internlm2-20b", "bartowski/internlm2-math-plus-20b-GGUF/internlm2-math-plus-20b-Q4_K_M.gguf");
MODEL_MAP.put("jamba", "Severian/Jamba-900M-GGUF/jamba-900M.bf16.gguf");
MODEL_MAP.put("josie", "mradermacher/JOSIExMistral-7B-Instruct-v0.2-GGUF/JOSIExMistral-7B-Instruct-v0.2.Q4_K_M.gguf");
MODEL_MAP.put("kichtral", "mradermacher/Kichtral-7B-v0.01-GGUF/Kichtral-7B-v0.01.Q4_K_M.gguf");
MODEL_MAP.put("law", "TheBloke/law-LLM-GGUF");
MODEL_MAP.put("law-13b", "TheBloke/law-LLM-13B-GGUF");
MODEL_MAP.put("law-chat", "TheBloke/law-chat-GGUF");
MODEL_MAP.put("llama-medx", "JL42/Llama-medx_v0-GGUF");
MODEL_MAP.put("llama-pro", "TheBloke/LLaMA-Pro-8B-GGUF");
MODEL_MAP.put("llama2", "second-state/Llama-2-7B-Chat-GGUF/Llama-2-7b-chat-hf-Q5_K_M.gguf");
MODEL_MAP.put("llama2-7b", "TheBloke/Llama-2-7B-GGUF");
MODEL_MAP.put("llama2-13b", "TheBloke/Llama-2-13B-GGUF");
MODEL_MAP.put("llama2-13b-erebus", "KoboldAI/LLaMA2-13B-Erebus-v3-GGUF/llama2-13b-erebus-v3.Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("llama2-verilog", "silverliningeda/llama-2-7b-silverliningeda-verilog-codegen-GGUF/llama-2-7b-silverliningeda-verilog-codegen.gguf.q4_k_m.bin");
MODEL_MAP.put("llama2-chinese", "TheBloke/Chinese-Llama-2-7B-GGUF");
MODEL_MAP.put("llama2-megacode2", "TheBloke/Llama2-13B-MegaCode2-OASST-GGUF");
MODEL_MAP.put("llama2-orca", "TheBloke/Llama-2-70B-Orca-200k-GGUF");
MODEL_MAP.put("llama2-psyfighter", "KoboldAI/LLaMA2-13B-Psyfighter2-GGUF/LLaMA2-13B-Psyfighter2.Q4_K_M.gguf");
MODEL_MAP.put("llama2-uncensored", "TheBloke/Luna-AI-Llama2-Uncensored-GGUF");
MODEL_MAP.put("llama3", "SanctumAI/Meta-Llama-3-8B-Instruct-GGUF");
MODEL_MAP.put("llama3-8b", "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf");
MODEL_MAP.put("llama3-8b-instruct", "QuantFactory/Meta-Llama-3-8B-Instruct-GGUF-v2/Meta-Llama-3-8B-Instruct-v2.Q4_K_M.gguf");
MODEL_MAP.put("llama3-8b-abliterated", "failspy/Llama-3-8B-Instruct-abliterated-GGUF/Llama-3-8B-Instruct-abliterated-q4_k.gguf");
MODEL_MAP.put("llama3-8b-irene", "mradermacher/Llama-3-8B-Irene-v0.2-i1-GGUF/Llama-3-8B-Irene-v0.2.i1-Q4_K_M.gguf");
MODEL_MAP.put("llama3-70b", "bartowski/Meta-Llama-3-70B-Instruct-GGUF/Meta-Llama-3-70B-Instruct-Q4_K_M.gguf");
MODEL_MAP.put("llama3-70b-abliterated", "failspy/llama-3-70B-Instruct-abliterated-GGUF/llama-3-70B-Instruct-abliterated-q4_k_m.gguf");
MODEL_MAP.put("llama3-70b-instruct", "QuantFactory/Meta-Llama-3-70B-Instruct-GGUF-v2/Meta-Llama-3-70B-Instruct-v2.Q4_K_M.gguf");
MODEL_MAP.put("llama3-70b-synthia", "bartowski/Llama-3-70B-Synthia-v3.5-GGUF/Llama-3-70B-Synthia-v3.5-Q4_K_M.gguf");
MODEL_MAP.put("llama3-alpha-centauri", "mradermacher/Llama-3-Alpha-Centauri-4x8B-v0.1-i1-GGUF/Llama-3-Alpha-Centauri-4x8B-v0.1.i1-Q4_K_M.gguf");
MODEL_MAP.put("llama3-alpha-ko", "ditioner/Llama-3-Alpha-Ko-8B-Instruct-Q8_0-GGUF/llama-3-alpha-ko-8b-instruct-q8_0.gguf");
MODEL_MAP.put("llama3-arimas", "mradermacher/Llama-3-70b-Arimas-story-RP-V1-i1-GGUF/Llama-3-70b-Arimas-story-RP-V1.i1-Q4_K_M.gguf");
MODEL_MAP.put("llama3-cat", "mradermacher/llama-3-cat-8b-instruct-GGUF");
MODEL_MAP.put("llama3-chatqa", "bartowski/Llama-3-ChatQA-1.5-8B-GGUF/ChatQA-1.5-8B-Q4_K_M.gguf");
MODEL_MAP.put("llama3-chatqa-smashed", "PrunaAI/nvidia-Llama3-ChatQA-1.5-8B-GGUF-smashed/Llama3-ChatQA-1.5-8B.Q4_K_M.gguf");
MODEL_MAP.put("llama3-chatqa-70b-smashed", "PrunaAI/Llama3-ChatQA-1.5-70B-GGUF-smashed/Llama3-ChatQA-1.5-70B.Q4_K_M.gguf");
MODEL_MAP.put("llama3-chatty", "mradermacher/Llama-3-Chatty-2x8B-i1-GGUF/Llama-3-Chatty-2x8B.i1-Q4_K_M.gguf");
MODEL_MAP.put("llama3-daybreak", "crestf411/llama3-daybreak-lumimaid0.1-8b-gguf/llama3-daybreak-lumimaid0.1-8b-q5_k_m.gguf?not-for-all-audiences=true");
MODEL_MAP.put("llama3-english-hinglish", "mradermacher/llama-3-8b-English-to-Hinglish-GGUF/llama-3-8b-English-to-Hinglish.Q4_K_M.gguf");
MODEL_MAP.put("llama3-function-call", "mudler/LocalAI-Llama3-8b-Function-Call-v0.2-GGUF/LocalAI-Llama3-8b-Function-Call-v0.2-q4_k_m.bin");
MODEL_MAP.put("llama3-hindi", "hus960/LLama3-Gaja-Hindi-8B-v0.1-Q4_K_M-GGUF/llama3-gaja-hindi-8b-v0.1.Q4_K_M.gguf");
MODEL_MAP.put("llama3-indotunned", "ukung/Llama3-IndoTunned-GGUF/Llama3-IndoTunned-q4_k_m.gguf");
MODEL_MAP.put("llama3-instruct", "bartowski/Meta-Llama-3-8B-Instruct-GGUF/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf");
MODEL_MAP.put("llama3-instruct-abliterated", "failspy/Meta-Llama-3-8B-Instruct-abliterated-v3-GGUF/Meta-Llama-3-8B-Instruct-abliterated-v3_q6.gguf");
MODEL_MAP.put("llama3-instruct-coder", "bartowski/Llama-3-8B-Instruct-Coder-v2-GGUF/Llama-3-8B-Instruct-Coder-v2-Q4_K_M.gguf");
MODEL_MAP.put("llama3-gradient", "crusoeai/Llama-3-8B-Instruct-Gradient-1048k-GGUF");
MODEL_MAP.put("llama3-gujarati", "ayushrupapara/llama3_gujarati_4bit_gguf/model.gguf");
MODEL_MAP.put("llama3-gradient-4194k", "leafspark/Llama-3-8B-Instruct-Gradient-4194k-GGUF");
MODEL_MAP.put("llama3-gradient-4194k-smashed", "PrunaAI/Llama-3-8B-Instruct-Gradient-4194k-GGUF-smashed/Llama-3-8B-Instruct-Gradient-4194k.Q4_K_M.gguf");
MODEL_MAP.put("llama3-granite", "mradermacher/Llama-3-6B-Granite-v0.1-GGUF/Llama-3-6B-Granite-v0.1.Q3_K_M.gguf");
MODEL_MAP.put("llama3-hercules", "bartowski/Llama-3-Hercules-5.0-8B-GGUF/Llama-3-Hercules-5.0-8B-Q4_K_M.gguf");
MODEL_MAP.put("llama3-lexi-uncensored", "Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF/Lexi-Llama-3-8B-Uncensored_Q4_K_M.gguf");
MODEL_MAP.put("llama3-lexifun-uncensored", "Orenguteng/Llama-3-8B-LexiFun-Uncensored-V1-GGUF/LexiFun-Llama-3-8B-Uncensored-V1_Q4_K_M.gguf");
MODEL_MAP.put("llama3-lumimaid", "NeverSleep/Llama-3-Lumimaid-8B-v0.1-GGUF/Llama-3-Lumimaid-8B-v0.1.q4_k_m.gguf?not-for-all-audiences=true");
MODEL_MAP.put("llama3-lumimaid-smashed", "PrunaAI/Llama-3-Lumimaid-8B-v0.1-GGUF-smashed/Llama-3-Lumimaid-8B-v0.1.Q4_K_M.gguf");
MODEL_MAP.put("llama3-megamed", "mradermacher/Llama-3-MegaMed-8B-Model-Stock-GGUF/Llama-3-MegaMed-8B-Model-Stock.Q4_K_M.gguf");
MODEL_MAP.put("llama3-openhermes", "mradermacher/Llama3-8B-OpenHermes-DPO-GGUF/Llama3-8B-OpenHermes-DPO.Q4_K_M.gguf");
MODEL_MAP.put("llama3-pair", "QuantFactory/pair-preference-model-LLaMA3-8B-GGUF/pair-preference-model-LLaMA3-8B.Q4_K_M.gguf");
MODEL_MAP.put("llama3-refueled", "LoneStriker/Llama-3-Refueled-GGUF/Llama-3-Refueled-Q4_K_M.gguf");
MODEL_MAP.put("llama3-shisa", "keitokei1994/Llama-3-8B-shisa-2x8B-gguf/Llama-3-8B-shisa-2x8B_Q4_K_M.gguf");
MODEL_MAP.put("llama3-shennu", "wwe180/Llama3-15B-ShenNu-v0.1-Q6_K-GGUF/llama3-15b-shennu-v0.1-q6_k.gguf");
MODEL_MAP.put("llama3-snowy", "mradermacher/Llama-3-SnowyRP-8B-V1-B-GGUF/Llama-3-SnowyRP-8B-V1-B.Q4_K_M.gguf");
MODEL_MAP.put("llama3-sqlcoder", "e-palmisano/llama-3-sqlcoder-8b-Q4_K_M-GGUF/llama-3-sqlcoder-8b.Q4_K_M.gguf");
MODEL_MAP.put("llama3-sqlcoder-unsloth", "jurieyel/Llama3-sqlcoder-8b-4bit-GGUF-q4_K_M/Llama3-sqlcoder-8b-4bit-GGUF-q4_K_M-unsloth.Q4_K_M.gguf");
MODEL_MAP.put("llama3-tenyxchat", "mradermacher/Llama3-TenyxChat-70B-i1-GGUF/Llama3-TenyxChat-70B.i1-Q4_K_M.gguf");
MODEL_MAP.put("llama3-unholy", "Undi95/Llama-3-Unholy-8B-GGUF/Llama-3-Unholy-8B.q4_k_m.gguf?not-for-all-audiences=true");
MODEL_MAP.put("llama3-unholy-smashed", "PrunaAI/Llama-3-Unholy-8B-GGUF-smashed/Llama-3-Unholy-8B.Q4_K_M.gguf");
MODEL_MAP.put("llava", "jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf/llava-v1.5-7b-mmproj-f16.gguf");
MODEL_MAP.put("llava-13b", "PsiPi/liuhaotian_llava-v1.5-13b-GGUF/llava-v1.5-13b-Q5_K_M.gguf/mmproj-model-f16.gguf");
MODEL_MAP.put("llava-llama3", "xtuner/llava-llama-3-8b-v1_1-gguf/llava-llama-3-8b-v1_1-int4.gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf");
MODEL_MAP.put("llava-llama3-smashed", "PrunaAI/llava-llama-3-8b-v1_1-GGUF-smashed/llava-llama-3-8b-v1_1.Q4_K_M.gguf");
MODEL_MAP.put("llava-maid", "megaaziib/Llava-Maid-7B-DPO-GGUF/llava-maid-7b-dpo.Q4_K_M.gguf/mmproj-mistral7b-f16.gguf");
MODEL_MAP.put("llava-mm", "mys/ggml_llava-v1.5-7b/ggml-model-q4_k.gguf/mmproj-model-f16.gguf");
MODEL_MAP.put("llava-phi3", "xtuner/llava-phi-3-mini-gguf/llava-phi-3-mini-f16.gguf/llava-phi-3-mini-mmproj-f16.gguf");
MODEL_MAP.put("llava16-34b", "cjpais/llava-v1.6-34B-gguf/mmproj-model-f16.gguf");
MODEL_MAP.put("llava16-mistral", "cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q4_K_M.gguf");
MODEL_MAP.put("llava16-mistral-7b", "mradermacher/llava-v1.6-mistral-7b-GGUF");
MODEL_MAP.put("llava16-vicuna-7b", "cjpais/llava-v1.6-vicuna-7b-gguf/mmproj-model-f16.gguf");
MODEL_MAP.put("llava16-vicuna-13b", "cjpais/llava-v1.6-vicuna-13b-gguf/mmproj-model-f16.gguf");
MODEL_MAP.put("luna", "PrunaAI/Luna-8B-Instruct-262k-GGUF-smashed/Luna-8B-Instruct-262k.Q4_K_M.gguf");
MODEL_MAP.put("lynn", "PrunaAI/lynn-7b-alpha-GGUF-smashed/lynn-7b-alpha.Q4_K_M.gguf");
MODEL_MAP.put("magicoder", "TheBloke/Magicoder-S-DS-6.7B-GGUF");
MODEL_MAP.put("magicprompt-stable-diffusion", "duyntnet/MagicPrompt-Stable-Diffusion-imatrix-GGUF/MagicPrompt-Stable-Diffusion-Q4_K_M.gguf");
MODEL_MAP.put("maid-yuzu", "mradermacher/maid-yuzu-v8-GGUF");
MODEL_MAP.put("mamba", "bartowski/mamba-2.8b-hf-GGUF/mamba-2.8b-hf-Q4_K_M.gguf");
MODEL_MAP.put("med42", "TheBloke/med42-70B-GGUF");
MODEL_MAP.put("medalpaca", "TheBloke/medalpaca-13B-GGUF");
MODEL_MAP.put("medichat", "QuantFactory/Medichat-Llama3-8B-GGUF/Medichat-Llama3-8B.Q4_K_M.gguf");
MODEL_MAP.put("medicine", "TheBloke/medicine-LLM-GGUF");
MODEL_MAP.put("medicine-13b", "TheBloke/medicine-LLM-13B-GGUF");
MODEL_MAP.put("medicine-chat", "TheBloke/medicine-chat-GGUF");
MODEL_MAP.put("meditron", "TheBloke/meditron-7B-GGUF");
MODEL_MAP.put("meditron-7b", "chrohi/meditron-7b-Q8_0-GGUF/meditron-7b-q8_0.gguf");
MODEL_MAP.put("meditron-70b", "TheBloke/meditron-70B-GGUF");
MODEL_MAP.put("meditron-chat", "TheBloke/meditron-7B-chat-GGUF");
MODEL_MAP.put("medllama2", "garcianacho/MedLlama-2-7B-GGUF/MedLlama-2-7B.q5_K_M.gguf");
MODEL_MAP.put("medllama3", "bartowski/JSL-MedLlama-3-8B-v2.0-GGUF/JSL-MedLlama-3-8B-v2.0-Q4_K_M.gguf");
MODEL_MAP.put("medllama3-20", "JL42/medllama3-v20-GGUF");
MODEL_MAP.put("megamix", "Sao10K/MegaMix-L2-13B-GGUF/Megamix-T1-13B.q5_K_M.gguf");
MODEL_MAP.put("megatron", "mradermacher/megatron_v3_2x7B-GGUF/megatron_v3_2x7B.Q4_K_M.gguf");
MODEL_MAP.put("megatron-moe", "mradermacher/megatron_2.1_MoE_2x7B-GGUF/megatron_2.1_MoE_2x7B.Q4_K_M.gguf");
MODEL_MAP.put("midnight", "mradermacher/Midnight-Miqu-70B-v1.5-i1-GGUF/Midnight-Miqu-70B-v1.5.i1-Q4_K_M.gguf");
MODEL_MAP.put("minicpm-llama3", "openbmb/MiniCPM-Llama3-V-2_5-gguf/ggml-model-Q4_K_M.gguf");
MODEL_MAP.put("mistral", "TheBloke/Mistral-7B-Instruct-v0.2-GGUF");
MODEL_MAP.put("mistral-7b", "pere/mistral-7b-reference100k-Q8_0-GGUF/mistral-7b-reference100k.Q8_0.gguf");
MODEL_MAP.put("mistral-7b-aya", "MaziyarPanahi/Mistral-7B-Instruct-Aya-101-GGUF/Mistral-7B-Instruct-Aya-101.Q5_K_M.gguf");
MODEL_MAP.put("mistral-7b-instruct", "MaziyarPanahi/Mistral-7B-Instruct-v0.3-GGUF/Mistral-7B-Instruct-v0.3.Q4_K_M.gguf");
MODEL_MAP.put("mistral-7b-instruct-imatrix", "hermes42/Mistral-7B-Instruct-v0.3-imatrix-GGUF/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf");
MODEL_MAP.put("mistral-7b-instruct-sota", "CISCai/Mistral-7B-Instruct-v0.3-SOTA-GGUF/Mistral-7B-Instruct-v0.3.IQ3_M.gguf");
MODEL_MAP.put("mistral-charttotext", "moetezsa/mistral_charttotext_gguf/mistral_charttotext_gguf-unsloth.Q4_K_M.gguf");
MODEL_MAP.put("mistral-hindi", "IndicRAGware/mistral-hindi-RAG-7b-GGUF/mistral-hindi-rag-7b.fp16.gguf");
MODEL_MAP.put("mistral-holodeck", "KoboldAI/Mistral-7B-Holodeck-1-GGUF/mistral7b-holodeck-v1.Q4_K_M.gguf");
MODEL_MAP.put("mistral-instruct", "TheBloke/Mistral-7B-Instruct-v0.1-GGUF");
MODEL_MAP.put("mistral-instruct-7b", "second-state/Mistral-7B-Instruct-v0.2-GGUF/Mistral-7B-Instruct-v0.2-Q5_K_M.gguf");
MODEL_MAP.put("mistral-neuraldpo", "mradermacher/Mistral-NeuralDPO-GGUF/Mistral-NeuralDPO.Q4_K_M.gguf");
MODEL_MAP.put("mistral-openorca", "TheBloke/Mistral-7B-OpenOrca-GGUF");
MODEL_MAP.put("mistral-orthogonalized", "PrunaAI/mistral-orthogonalized-GGUF-smashed/mistral-orthogonalized.Q4_K_M.gguf");
MODEL_MAP.put("mistral-rag", "FilippoToso/Mistral-RAG-Q8_0-GGUF/mistral-rag.Q8_0.gguf");
MODEL_MAP.put("mistrallite", "TheBloke/MistralLite-7B-GGUF/mistrallite.Q4_K_M.gguf");
MODEL_MAP.put("mixtral", "TheBloke/Mixtral-8x7B-v0.1-GGUF");
MODEL_MAP.put("mixtral-cybermind", "mradermacher/Mixtral_AI_Cyber_MegaMind_3_0-GGUF/Mixtral_AI_Cyber_MegaMind_3_0.Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("mixtral-erotic", "mradermacher/Mixtral_Erotic_13Bx2_MOE_22B-GGUF/Mixtral_Erotic_13Bx2_MOE_22B.Q4_K_M.gguf");
MODEL_MAP.put("mixtral-holodeck", "KoboldAI/Mixtral-8x7B-Holodeck-v1-GGUF/Mixtral-8x7B-holodeck-v1.Q4_K_M.gguf");
MODEL_MAP.put("mixtral-instruct", "matteocavestri/Mixtral-8x7B-Instruct-v0.1-Q4_K_M-GGUF/mixtral-8x7b-instruct-v0.1-q4_k_m.gguf");
MODEL_MAP.put("mixtral-megamerge", "mradermacher/mixtral-megamerge-dare-8x7b-v2-GGUF");
MODEL_MAP.put("mixtral-7bx5", "LoneStriker/Mixtral_7Bx5_MoE_30B-GGUF/Mixtral_7Bx5_MoE_30B-Q4_K_M.gguf");
MODEL_MAP.put("mobilevlm", "Blombert/MobileVLM-3B-GGUF/mobilevlm-3b.Q5_K_M.gguf/mmproj-model-f16.gguf");
MODEL_MAP.put("moistral", "TheDrummer/Moistral-11B-v4-GGUF/Moistral-11B-v4-Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("moondream2", "vikhyatk/moondream2/moondream2-text-model-f16.gguf/moondream2-mmproj-f16.gguf");
MODEL_MAP.put("ms-wizardlm", "newsletter/microsoft_WizardLM-2-7B-Q6_K-GGUF/microsoft_wizardlm-2-7b.Q6_K.gguf");
MODEL_MAP.put("mythalion", "TheBloke/Mythalion-13B-GGUF");
MODEL_MAP.put("mumath-code", "mradermacher/MuMath-Code-L-7B-GGUF/MuMath-Code-L-7B.Q4_K_M.gguf");
MODEL_MAP.put("mumath-code-34b", "mradermacher/MuMath-Code-CL-34B-GGUF/MuMath-Code-CL-34B.Q4_K_M.gguf");
MODEL_MAP.put("mythomist", "TheBloke/MythoMist-7B-GGUF");
MODEL_MAP.put("natural-sql", "chatdb/natural-sql-7b-GGUF");
MODEL_MAP.put("neural-chat", "TheBloke/neural-chat-7B-v3-3-GGUF");
MODEL_MAP.put("nexusraven", "TheBloke/NexusRaven-V2-13B-GGUF");
MODEL_MAP.put("noromaid", "TheBloke/Noromaid-20B-v0.1.1-GGUF");
MODEL_MAP.put("notaires", "mradermacher/Notaires_Yi-1.5-9B-Chat-GGUF/Notaires_Yi-1.5-9B-Chat.Q8_0.gguf");
MODEL_MAP.put("notus", "TheBloke/notus-7B-v1-GGUF");
MODEL_MAP.put("notux", "TheBloke/notux-8x7b-v1-GGUF");
MODEL_MAP.put("nous-capybara", "TheBloke/Nous-Capybara-34B-GGUF");
MODEL_MAP.put("nous-hermes", "TheBloke/Nous-Hermes-13B-GGUF/Nous-Hermes-13B.Q4_K_M.gguf");
MODEL_MAP.put("nous-hermes-llama2", "TheBloke/Nous-Hermes-Llama2-70B-GGUF");
MODEL_MAP.put("nous-hermes2", "TheBloke/Nous-Hermes-2-Yi-34B-GGUF");
MODEL_MAP.put("nous-hermes2-llama3", "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf");
MODEL_MAP.put("nous-hermes2-mixtral", "TheBloke/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF");
MODEL_MAP.put("octopus", "NexaAIDev/octopus-v4-gguf/Octopus-v4-Q4_K_S.gguf");
MODEL_MAP.put("openorca-gemma-2b", "LoneStriker/OrcaGemma-2B-GGUF/OrcaGemma-2B-Q4_K_M.gguf");
MODEL_MAP.put("openorca-platypus2", "TheBloke/OpenOrca-Platypus2-13B-GGUF");
MODEL_MAP.put("openorca-zephyr", "TheBloke/OpenOrca-Zephyr-7B-GGUF");
MODEL_MAP.put("openbio-llama3", "mradermacher/OpenBioLLM-Llama3-8B-GGUF/OpenBioLLM-Llama3-8B.Q4_K_M.gguf");
MODEL_MAP.put("openbio-llama3-70b", "mradermacher/OpenBioLLM-Llama3-70B-i1-GGUF/OpenBioLLM-Llama3-70B.i1-Q4_K_M.gguf");
MODEL_MAP.put("openbio-llama3-smashed", "PrunaAI/OpenBioLLM-Llama3-8B-GGUF-smashed/OpenBioLLM-Llama3-8B.Q4_K_M.gguf");
MODEL_MAP.put("openbuddy-llama2", "TheBloke/OpenBuddy-Llama2-13B-v11.1-GGUF");
MODEL_MAP.put("openchat", "TheBloke/openchat_3.5-GGUF");
MODEL_MAP.put("openchat35", "second-state/OpenChat-3.5-0106-GGUF/openchat-3.5-0106-Q5_K_M.gguf");
MODEL_MAP.put("openchat36", "bartowski/openchat-3.6-8b-20240522-GGUF/openchat-3.6-8b-20240522-Q4_K_M.gguf");
MODEL_MAP.put("openhermes", "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF");
MODEL_MAP.put("openllama", "klosax/openllama-3b-v2-gguf/openllama-3b-v2-q4_0.gguf");
MODEL_MAP.put("orca-mini", "TheBloke/orca_mini_v3_7B-GGUF");
MODEL_MAP.put("orca2", "TheBloke/Orca-2-7B-GGUF");
MODEL_MAP.put("orca2-13b", "second-state/Orca-2-13B-GGUF/Orca-2-13b-Q5_K_M.gguf");
MODEL_MAP.put("orthocopter", "Lewdiculous/Orthocopter_8B-GGUF-Imatrix/Orthocopter_8B-Q4_K_M-imat.gguf");
MODEL_MAP.put("pandora", "mradermacher/Pandora_7B-GGUF/Pandora_7B.Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("phi-so-serious", "concedo/Phi-SoSerious-Mini-V1-GGUF/PhiSoSerious-Q4_K_M.gguf");
MODEL_MAP.put("phi-so-serious-imatrix", "duyntnet/Phi-SoSerious-Mini-V1-imatrix-GGUF/Phi-SoSerious-Mini-V1-Q4_K_M.gguf");
MODEL_MAP.put("phi-so-serious-mini", "NikolayKozloff/Phi-SoSerious-Mini-V1-Q8_0-Q6_K-Q5_K_M-Q4_0-GGUF/PhiSoSerious-Q5_K_M.gguf");
MODEL_MAP.put("phi2", "TheBloke/phi-2-GGUF");
MODEL_MAP.put("phillama", "cleatherbury/phillama-3.8b-v0.1-Q4_K_M-GGUF/phillama-3.8b-v0.1.Q4_K_M.gguf");
MODEL_MAP.put("phi2-electrical-engineering", "TheBloke/phi-2-electrical-engineering-GGUF");
MODEL_MAP.put("phi2-sql", "nakcnx/phi-2-sql-gguf/phi-2-sql-Q5_K_M.gguf");
MODEL_MAP.put("phi3", "microsoft/Phi-3-mini-4k-instruct-gguf/Phi-3-mini-4k-instruct-q4.gguf");
MODEL_MAP.put("phi3-128k", "MoMonir/Phi-3-mini-128k-instruct-GGUF");
MODEL_MAP.put("phi3-128k-6b", "NikolayKozloff/phi3-128k-6b-Q8_0-GGUF/phi3-128k-6b.Q8_0.gguf");
MODEL_MAP.put("phi3-code-128k", "RDson/Phi-3-mini-code-finetune-128k-instruct-v1-GGUF/Code-Phi-3-mini-128k-instruct-GGUF-Q4_K_M.gguf");
MODEL_MAP.put("phi3-medium", "bartowski/Phi-3-medium-4k-instruct-GGUF/Phi-3-medium-4k-instruct-Q4_K_M.gguf");
MODEL_MAP.put("phi3-medium-128k", "bartowski/Phi-3-medium-128k-instruct-GGUF/Phi-3-medium-128k-instruct-Q4_K_M.gguf");
MODEL_MAP.put("phi3-medium-4k", "nisten/phi3-medium-4k-gguf/13phi4kq5km.gguf");
MODEL_MAP.put("phi3-medium-4k-instruct", "second-state/Phi-3-medium-4k-instruct-GGUF/Phi-3-medium-4k-instruct-Q4_K_M.gguf");
MODEL_MAP.put("phi3-medium-4k-instruct-abliterated", "failspy/Phi-3-medium-4k-instruct-abliterated-v3-GGUF/Phi-3-medium-4k-instruct-abliterated-v3_q4.gguf");
MODEL_MAP.put("phi3-mini-4k", "ALI-B/phi3-mini-4k-gguf/phi3-mini-4k-gguf-unsloth.Q4_K_M.gguf");
MODEL_MAP.put("phi3-mini-4k-instruct", "second-state/Phi-3-mini-4k-instruct-GGUF/Phi-3-mini-4k-instruct-Q4_K_M.gguf");
MODEL_MAP.put("phi3-mini-128k-instruct", "second-state/Phi-3-mini-128k-instruct-GGUF/Phi-3-mini-128k-instruct-Q4_K_M.gguf");
MODEL_MAP.put("phi3-mini-128k-instruct-imatrix", "PrunaAI/Phi-3-mini-128k-instruct-GGUF-Imatrix-smashed/Phi-3-mini-128k-instruct.Q4_K_M.gguf");
MODEL_MAP.put("phi3-mini-math", "jrc/phi3-mini-math-Q4_K_M-GGUF/phi3-mini-math.Q4_K_M.gguf");
MODEL_MAP.put("phind-codellama", "TheBloke/Phind-CodeLlama-34B-v2-GGUF");
MODEL_MAP.put("photolens-medllama2", "s3nh/Photolens-MedLLaMa-2-Chat-7b-GGUF/Photolens-MedLLaMa-2-Chat-7b.Q4_K_M.gguf");
MODEL_MAP.put("pipai-sql", "ukung/PipAI-SQL-1.3b-GGUF/PipAI-SQL-1.3b-q4_k_m.gguf");
MODEL_MAP.put("poppy-porpoise", "Lewdiculous/Poppy_Porpoise-0.72-L3-8B-GGUF-IQ-Imatrix/Poppy_Porpoise-0.72-L3-8B-Q4_K_M-imat.gguf");
MODEL_MAP.put("poro", "PrunaAI/Poro-34B-GGUF-smashed/Poro-34B.Q4_K_M.gguf");
MODEL_MAP.put("power-llama3", "mradermacher/Power-Llama-3-7B-Instruct-GGUF/Power-Llama-3-7B-Instruct.Q4_K_M.gguf");
MODEL_MAP.put("power-llama3-13b", "mradermacher/Power-Llama-3-13b-GGUF/Power-Llama-3-13b.Q4_K_M.gguf");
MODEL_MAP.put("psymedrp", "TheBloke/PsyMedRP-v1-20B-GGUF?not-for-all-audiences=true");
MODEL_MAP.put("question-builder", "SicariusSicariiStuff/Question_Builder_GGUF/Question_Builder-Q4_K_M.gguf");
MODEL_MAP.put("qwen", "LoneStriker/Qwen1.5-8x7b-GGUF/Qwen1.5-8x7b-Q4_K_M.gguf");
MODEL_MAP.put("qwen-0.5b-chat", "second-state/Qwen1.5-0.5B-Chat-GGUF/Qwen1.5-0.5B-Chat-Q4_K_M.gguf");
MODEL_MAP.put("qwen-1.8b-chat", "second-state/Qwen1.5-1.8B-Chat-GGUF/Qwen1.5-1.8B-Chat-Q4_K_M.gguf");
MODEL_MAP.put("qwen-4b-chat", "second-state/Qwen1.5-4B-Chat-GGUF/Qwen1.5-4B-Chat-Q4_K_M.gguf");
MODEL_MAP.put("qwen-7b-chat", "second-state/Qwen1.5-7B-Chat-GGUF/Qwen1.5-7B-Chat-Q4_K_M.gguf");
MODEL_MAP.put("qwen-14b-chat", "second-state/Qwen1.5-14B-Chat-GGUF/Qwen1.5-14B-Chat-Q4_K_M.gguf");
MODEL_MAP.put("qwen-72b-chat", "second-state/Liberated-Qwen1.5-72B-GGUF/Liberated-Qwen1.5-72B-Q4_K_M.gguf");
MODEL_MAP.put("qwen-smashed", "PrunaAI/Qwen1.5-MoE-A2.7B-Chat-GGUF-smashed/Qwen1.5-MoE-A2.7B-Chat.Q4_K_M.gguf");
MODEL_MAP.put("qwen-stable-diffusion", "hahahafofo/Qwen-1_8B-Stable-Diffusion-Prompt-GGUF/ggml-model-q4_0.gguf");
MODEL_MAP.put("rakuten", "RichardErkhov/Rakuten_-_RakutenAI-7B-instruct-gguf/RakutenAI-7B-instruct.Q4_K_M.gguf");
MODEL_MAP.put("rhea", "mradermacher/Rhea-72b-v0.5-i1-GGUF/Rhea-72b-v0.5.i1-Q4_K_M.gguf");
MODEL_MAP.put("rocket", "TheBloke/rocket-3B-GGUF");
MODEL_MAP.put("roleplay-llama3", "mradermacher/Roleplay-Llama-3-8B-i1-GGUF/Roleplay-Llama-3-8B.i1-Q4_K_M.gguf");
MODEL_MAP.put("romistral", "NikolayKozloff/RoMistral-7b-Instruct-Q8_0-GGUF/romistral-7b-instruct.Q8_0.gguf");
MODEL_MAP.put("samantha-mistral", "TheBloke/samantha-mistral-7B-GGUF");
MODEL_MAP.put("samantha-wizardlm2", "Guilherme34/Samantha-wizardlm2-GGUF/Samantha-wizardlm2.gguf");
MODEL_MAP.put("sdxl-lightning", "OlegSkutte/SDXL-Lightning-GGUF/sdxl_lightning_4step.q8_0.gguf");
MODEL_MAP.put("sdxl-turbo", "OlegSkutte/sdxl-turbo-GGUF/sd_xl_turbo_1.0.q8_0.gguf");
MODEL_MAP.put("sensualize-mistral", "TheBloke/Sensualize-Mixtral-GGUF");
MODEL_MAP.put("sensualize-solar", "TheBloke/Sensualize-Solar-10.7B-GGUF");
MODEL_MAP.put("sfr-llama3", "bartowski/LLaMA3-iterative-DPO-final-GGUF/LLaMA3-iterative-DPO-final-Q4_K_M.gguf");
MODEL_MAP.put("slim-ner", "llmware/slim-ner-tool/slim-ner.gguf");
MODEL_MAP.put("slim-sentiment", "llmware/slim-sentiment-tool/slim-sentiment.gguf");
MODEL_MAP.put("slim-sql", "llmware/slim-sql-tool/slim-sql.gguf");
MODEL_MAP.put("smaug-llama3", "bartowski/Smaug-Llama-3-70B-Instruct-GGUF/Smaug-Llama-3-70B-Instruct-Q4_K_M.gguf");
MODEL_MAP.put("smaug-llama3-abliterated", "mradermacher/Smaug-Llama-3-70B-Instruct-abliterated-v3-i1-GGUF/Smaug-Llama-3-70B-Instruct-abliterated-v3.i1-Q4_K_M.gguf");
MODEL_MAP.put("snowflake-arctic", "ChristianAzinn/snowflake-arctic-embed-l-gguf/snowflake-arctic-embed-l--Q4_K_M.GGUF");
MODEL_MAP.put("solar", "second-state/SOLAR-10.7B-Instruct-v1.0-GGUF/SOLAR-10.7B-Instruct-v1.0-Q5_K_M.gguf");
MODEL_MAP.put("solar-uncensored", "TheBloke/SOLAR-10.7B-Instruct-v1.0-uncensored-GGUF");
MODEL_MAP.put("snowlotus", "s3nh/SnowLotus-v2-10.7B-GGUF");
MODEL_MAP.put("snowman", "TheBloke/You_can_cry_Snowman-13B-GGUF");
MODEL_MAP.put("speechless-mistral", "bartowski/speechless-instruct-mistral-7b-v0.2-GGUF/speechless-instruct-mistral-7b-v0.2-Q4_K_M.gguf");
MODEL_MAP.put("sqlcoder", "TheBloke/sqlcoder-GGUF");
MODEL_MAP.put("sqlcoder-7b", "TheBloke/sqlcoder-7B-GGUF");
MODEL_MAP.put("sqlcoder-34b", "TheBloke/sqlcoder-34b-alpha-GGUF");
MODEL_MAP.put("sqlcoder-mistral", "MaziyarPanahi/sqlcoder-7b-Mistral-7B-Instruct-v0.2-slerp-GGUF/sqlcoder-7b-Mistral-7B-Instruct-v0.2-slerp.Q4_K_M.gguf");
MODEL_MAP.put("stable-beluga", "TheBloke/StableBeluga-7B-GGUF");
MODEL_MAP.put("stable-beluga-13b", "TheBloke/StableBeluga-13B-GGUF");
MODEL_MAP.put("stable-beluga2-70b", "TheBloke/StableBeluga2-70B-GGUF/stablebeluga2-70B.Q4_K_M.gguf");
MODEL_MAP.put("stable-code", "TheBloke/stable-code-3b-GGUF");
MODEL_MAP.put("stable-diffusion", "jiaowobaba02/stable-diffusion-v2-1-GGUF/stable_diffusion-ema-pruned-v2-1_768.q8_0.gguf");
MODEL_MAP.put("stable-diffusion-pruned", "ozbillwang/stable-diffusion-2-1_768-ema-pruned.gguf/stable-diffusion-2-1_768-ema-pruned.gguf");
MODEL_MAP.put("stablelm-zephyr", "TheBloke/stablelm-zephyr-3b-GGUF");
MODEL_MAP.put("stablelm2", "second-state/stablelm-2-zephyr-1.6b-GGUF/stablelm-2-zephyr-1_6b-Q5_K_M.gguf");
MODEL_MAP.put("stablelm2-smashed", "PrunaAI/stablelm-2-12b-GGUF-smashed/stablelm-2-12b.Q4_K_M.gguf");
MODEL_MAP.put("starcoder", "TheBloke/starcoder-GGML/starcoder.ggmlv3.q4_0.bin");
MODEL_MAP.put("starcoder2", "Mooizz/starcoder2-gguf/starcoder2-7b-q4_k_m.gguf");
MODEL_MAP.put("starcoder2-instruct", "bartowski/starcoder2-15b-instruct-v0.1-GGUF/starcoder2-15b-instruct-v0.1-Q4_K_M.gguf");
MODEL_MAP.put("starcoder2-instruct-smashed", "PrunaAI/starcoder2-15b-instruct-v0.1-GGUF-smashed/starcoder2-15b-instruct-v0.1.Q4_K_M.gguf");
MODEL_MAP.put("starling", "LoneStriker/Starling-LM-7B-beta-GGUF/Starling-LM-7B-beta-Q4_K_M.gguf");
MODEL_MAP.put("stheno-imatrix", "Lewdiculous/L3-8B-Stheno-v3.1-GGUF-IQ-Imatrix/L3-8B-Stheno-v3.1-Q4_K_M-imat.gguf");
MODEL_MAP.put("stheno-mega", "Sao10K/Stheno-Mega-False-49B-L2-GGUF/Stheno-Mega-False-49B-L2.q5_K_M.gguf");
MODEL_MAP.put("synthia", "TheBloke/Synthia-70B-GGUF");
MODEL_MAP.put("t3q-inex", "MaziyarPanahi/T3qInex12-7B-GGUF/T3qInex12-7B.Q4_K_M.gguf");
MODEL_MAP.put("tinydolphin", "s3nh/TinyDolphin-2.8-1.1b-GGUF");
MODEL_MAP.put("tinyllama", "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF");
MODEL_MAP.put("tinyllama-1b", "newsletter/TinyLlama-1.1B-Chat-v1.0-Q6_K-GGUF/tinyllama-1.1b-chat-v1.0.Q6_K.gguf");
MODEL_MAP.put("tinyllama-openorca", "TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF");
MODEL_MAP.put("tinyllama-chat", "second-state/TinyLlama-1.1B-Chat-v1.0-GGUF/TinyLlama-1.1B-Chat-v1.0-Q5_K_M.gguf");
MODEL_MAP.put("tinyllama-indo", "ukung/TinyLlama-1.1B-indo-v1-GGUF/TinyLlama-1.1B-indo-v1-q4_k_m.gguf");
MODEL_MAP.put("tinyllama-nerd", "ar08/tinyllama-nerd-gguf/tinyllama-nerd-gguf-unsloth.Q4_K_M.gguf");
MODEL_MAP.put("tinyllama-pythonskill", "ukung/TinyLlama-1.1B-PythonSkill-GGUF/TinyLlama-1.1B-PythonSkill-q4_k_m.gguf");
MODEL_MAP.put("tinymistral", "ukung/TinyMistral-6x248M-Instruct-GGUF/TinyMistral-6x248M-Instruct-q4_k_m.gguf");
MODEL_MAP.put("tinymistral-chat", "ukung/TinyMistral-248M-Chat-v2-GGUF/TinyMistral-248M-Chat-v2-q4_k_m.gguf");
MODEL_MAP.put("tinymixtral", "ukung/TinyMixtral-32x248M-GGUF/TinyMixtral-32x248M-q4_k_m.gguf");
MODEL_MAP.put("tinyrag", "TroyDoesAI/Tiny-RAG-gguf/Tiny-RAG.gguf");
MODEL_MAP.put("tinyvicuna", "afrideva/Tiny-Vicuna-1B-GGUF/tiny-vicuna-1b.q8_0.gguf");
MODEL_MAP.put("toppy", "TheBloke/Toppy-M-7B-GGUF/toppy-m-7b.Q4_K_M.gguf?not-for-all-audiences=true");
MODEL_MAP.put("upstage-llama2", "TheBloke/Upstage-Llama-2-70B-instruct-v2-GGUF");
MODEL_MAP.put("vera", "EagleConsortium/Vera-7B-GGUF/Vera-Q4_K_M.gguf");
MODEL_MAP.put("vicuna", "TheBloke/vicuna-13B-v1.5-GGUF");
MODEL_MAP.put("white-rabbit", "TheBloke/WhiteRabbitNeo-13B-GGUF");
MODEL_MAP.put("whisper-large", "OllmOne/whisper-large-v3-GGUF/model-q4k.gguf");
MODEL_MAP.put("whisper-medium", "OllmOne/whisper-medium-GGUF/model-q4k.gguf");
MODEL_MAP.put("wizard-vicuna-uncensored", "TheBloke/Wizard-Vicuna-7B-Uncensored-GGUF/Wizard-Vicuna-7B-Uncensored.Q4_K_M.gguf");
MODEL_MAP.put("wizard-vicuna-uncensored-13b", "TheBloke/Wizard-Vicuna-13B-Uncensored-GGUF/Wizard-Vicuna-13B-Uncensored.Q4_K_M.gguf");
MODEL_MAP.put("wizard-vicuna-uncensored-30b", "TheBloke/Wizard-Vicuna-30B-Uncensored-GGUF/Wizard-Vicuna-30B-Uncensored.Q4_K_M.gguf");
MODEL_MAP.put("wizardlm", "TheBloke/WizardLM-13B-V1.2-GGUF");
MODEL_MAP.put("wizardlm2", "mradermacher/WizardLM-2-4x7B-MoE-GGUF/WizardLM-2-4x7B-MoE.Q4_K_M.gguf");
MODEL_MAP.put("wizardlm-7b", "NikolayKozloff/WizardLM-2-7B-abliterated-Q4_0-GGUF/wizardlm-2-7b-abliterated-q4_0.gguf");
MODEL_MAP.put("wizardlm-70b", "TheBloke/WizardLM-70B-V1.0-GGUF");
MODEL_MAP.put("wizardlm-uncensored", "TheBloke/WizardLM-7B-V1.0-Uncensored-GGUF");
MODEL_MAP.put("wizardlm-uncensored-13b", "TheBloke/WizardLM-13B-V1.0-Uncensored-GGUF");
MODEL_MAP.put("wizardlm-uncensored-33b", "TheBloke/WizardLM-33B-V1.0-Uncensored-GGUF");
MODEL_MAP.put("wizardcoder", "TheBloke/WizardCoder-33B-V1.1-GGUF");
MODEL_MAP.put("wizardcoder-python", "TheBloke/WizardCoder-Python-7B-V1.0-GGUF");
MODEL_MAP.put("wizardcoder-python-13b", "TheBloke/WizardCoder-Python-13B-V1.0-GGUF");
MODEL_MAP.put("wizardmath", "TheBloke/WizardMath-7B-V1.0-GGUF");
MODEL_MAP.put("wizardmath-13b", "TheBloke/WizardMath-13B-V1.0-GGUF");
MODEL_MAP.put("wizardmath-70b", "TheBloke/WizardMath-70B-V1.0-GGUF");
MODEL_MAP.put("wizardmega", "TheBloke/wizard-mega-13B-GGUF/wizard-mega-13B.Q4_K_M.gguf");
MODEL_MAP.put("wizardorca", "Aryanne/Wizard-Orca-3B-gguf/q5_1-wizard-orca-3b.gguf");
MODEL_MAP.put("xwincoder", "TheBloke/XwinCoder-13B-GGUF");
MODEL_MAP.put("xwincoder-34b", "TheBloke/XwinCoder-34B-GGUF");
MODEL_MAP.put("xwinlm", "TheBloke/Xwin-LM-13B-V0.1-GGUF");
MODEL_MAP.put("xwinlm-70b", "mradermacher/Xwin-LM-70B-V0.1-i1-GGUF/Xwin-LM-70B-V0.1.i1-Q4_K_M.gguf");
MODEL_MAP.put("yarn-llama2", "TheBloke/Yarn-Llama-2-13B-128K-GGUF");
MODEL_MAP.put("yarn-mistral", "TheBloke/Yarn-Mistral-7B-128k-GGUF");
MODEL_MAP.put("yi", "TheBloke/Yi-6B-200K-GGUF");
MODEL_MAP.put("yi-coder", "mradermacher/Yi-1.5-9B-coder-GGUF/Yi-1.5-9B-coder.Q4_K_M.gguf");
MODEL_MAP.put("yi-vl", "killgfat/Yi-VL-6B-GGUF");
MODEL_MAP.put("yi-llamafied", "TheBloke/Yi-34B-200K-Llamafied-GGUF");
MODEL_MAP.put("yi-34b", "TheBloke/Yi-34B-Chat-GGUF");
MODEL_MAP.put("yi-34b-200k", "TheBloke/Yi-34B-200K-GGUF");
MODEL_MAP.put("yi-34b-200k-megamerge", "TheBloke/Yi-34B-200K-DARE-megamerge-v8-GGUF");
MODEL_MAP.put("yi-34b-chat", "bartowski/Yi-1.5-34B-Chat-GGUF/Yi-1.5-34B-Chat-Q4_K_M.gguf");
MODEL_MAP.put("yi-34b-moe", "second-state/Yi-34Bx2-MoE-60B-GGUF/Yi-34Bx2-MoE-60B-Q5_K_M.gguf");
MODEL_MAP.put("zephyr", "TheBloke/zephyr-7B-beta-GGUF");
MODEL_MAP.put("zephyr-7b", "MaziyarPanahi/zephyr-7b-alpha-sharded-Mistral-7B-Instruct-v0.1-GGUF/zephyr-7b-alpha-sharded-Mistral-7B-Instruct-v0.1.Q4_K_M.gguf");
MODEL_MAP.put("zephyr-cucumber", "nalmeida/Zephyr-Cucumber-Instruct-GGUF/zephyr-cucumber-instruct.gguf.q4_k_m.bin");
MODEL_MAP.put("zephyrus", "Sao10K/Zephyrus-L1-33B-GGUF/Zephyrus-L1-33B.q4_K_M.gguf");
}
// Main entry point into jllm for command line parsing
public static void main(String[] args) {
try {
// Parse command line arguments and call the appropriate function.
if (args.length > 0) {
String commandName = args[0];
boolean helpRequested = args.length > 1 && args[1].equalsIgnoreCase("help");
switch (commandName) {
case "models":
if (!helpRequested) {
boolean needUrl = false;
boolean needSize = false;
String modelWildcard = "";
switch (args.length) {
case 4:
if ((args[1].toLowerCase().startsWith("v") && !isWild(args[1]))
|| (args[2].toLowerCase().startsWith("v") && !isWild(args[2]))
|| (args[3].toLowerCase().startsWith("v") && !isWild(args[3])))
needUrl = true;
if ((args[1].toLowerCase().startsWith("s") && !isWild(args[1]))
|| (args[2].toLowerCase().startsWith("s") && !isWild(args[2]))
|| (args[3].toLowerCase().startsWith("s") && !isWild(args[3])))
needSize = true;
if (isWild(args[1]))
modelWildcard = args[1];
if (isWild(args[2]))
modelWildcard = args[2];
if (isWild(args[3]))
modelWildcard = args[3];
break;
case 3:
if ((args[1].toLowerCase().startsWith("v") && !isWild(args[1]))
|| (args[2].toLowerCase().startsWith("v") && !isWild(args[2])))
needUrl = true;
if ((args[1].toLowerCase().startsWith("s") && !isWild(args[1]))
|| (args[2].toLowerCase().startsWith("s") && !isWild(args[2])))
needSize = true;
if (isWild(args[1]))
modelWildcard = args[1];
if (isWild(args[2]))
modelWildcard = args[2];
break;
case 2:
if (args[1].toLowerCase().startsWith("v") && !isWild(args[1]))
needUrl = true;
else if (args[1].toLowerCase().startsWith("s") && !isWild(args[1]))
needSize = true;
else if (isWild(args[1]))
modelWildcard = args[1];
break;
}
dumpModelMap(needUrl, needSize, modelWildcard);
}
else
commandHelp(commandName, false);
break;
case "ls":
case "dir":
case "show":
case "list":
if (!helpRequested)
listModels();
else
commandHelp(commandName, false);
break;
case "ps":
if (!helpRequested)
listRunningModels();
else
commandHelp(commandName, false);
break;
case "kill":
if (args.length > 1 && !helpRequested)
killModel(args[1]);
else
commandHelp(commandName, !helpRequested);
break;
case "download":
case "pull":
if (args.length > 1 && !helpRequested)
pullModel(args[1]);
else
commandHelp(commandName, !helpRequested);
break;
case "delete":
case "remove":
case "rm":
if (args.length > 1 && !helpRequested)
rmModel(args[1]);
else
commandHelp(commandName, !helpRequested);
break;
case "execute":
case "exec":
case "run":
if (args.length > 1 && !helpRequested) {
String modelName = args[1];
String listenHost = args.length > 2 ? args[2] : "localhost";
int listenPort = args.length > 3 ? getServerPort(args[3]) : nextPort(DEFAULT_PORT);
boolean isVerbose = args.length > 4 && args[4].equals("--verbose");
runModel(modelName, listenHost, listenPort, isVerbose);
} else {
commandHelp(commandName, !helpRequested);
}
break;
case "help":
showHelp();
break;
case "version":
System.out.println("jllm v1.0");
break;
default:
System.out.println("ERROR: Unknown command: " + commandName);
showHelp();
}
} else {
System.out.println("ERROR: No command provided");
showHelp();
}
} catch (Exception e) {
System.out.println("\n" + e.getMessage());
}
}
static void modelHelp() {
System.out.println("Many popular model names can be provided in short form. Eg llama2, tinyllama, etc");
System.out.println("See a list of all known models with short names by running jllm models");
System.out.println("The full model name is provided in format Repo-Owner/Repo-Name/File-Name");
System.out.println("Eg: TheBloke/Llama-2-7B-GGUF/llama-2-7b.Q4_K_M.gguf is a valid model name");
System.out.println(" if the File-Name is in a standard format it can be omitted");
System.out.println("Eg: TheBloke/Llama-2-7B-GGUF is also a valid model name");
}
static void commandHelp(String commandName, boolean insuffArgs) {
if (insuffArgs)
System.out.println("ERROR: Insufficient arguments to " + commandName + " model");
switch (commandName) {
case "ps":
case "ls":
case "dir":
case "show":
case "list":
System.out.println("No parameters needed for " + commandName + " command");
break;
case "models":
System.out.println("No parameters are needed for " + commandName + " command but there are two optional parameters");
System.out.println("We can add parameter 'size' to scrape the model size and 'verbose' to show the model URL");
break;
case "download":
case "pull":
case "delete":
case "remove":
case "rm":
System.out.println("The " + commandName + " command needs a single parameter for model name");
modelHelp();
break;
case "kill":
System.out.println("The " + commandName + " command needs a single parameter for the model to kill");
System.out.println("The parameter can match against PID, port, model owner, model repo or model file");
System.out.println("All matching processes will be killed");
modelHelp();
break;
case "execute":
case "exec":
case "run":
System.out.println("The " + commandName + " command needs at least 1 parameter for model name");
System.out.println("The 1st param is the model name and is mandatory. It can be a known model as listed in the 'models' command");
System.out.println("Here are some examples of valid options for this parameter:");
System.out.println("'mistrallite' - use any of the pre-defined model as shown by the 'models' command");
System.out.println("'TheBloke/sqlcoder-GGUF' - get model from HuggingFace repo 'sqlcoder-GGUF' owned by 'TheBloke' and get the default quantization");
System.out.println("'TheBloke/sqlcoder-GGUF/sqlcoder.Q4_0.gguf' - same as above but instead of the default quantization, specify the exact filename");
System.out.println("'c:/models/ggml-model-q4_0.gguf' - use a local model file from the filesystem");
System.out.println("'https://intranet.com/Fimbulvetr-11B-v2-Test-14.q4_K_M.gguf' - use a local model file downloaded via http(s)");
System.out.println("The 2nd param is the optional listener host and it defaults to localhost");
System.out.println("The 3rd param is the optional listener port and it defaults to next open port after 8080");
System.out.println("The 4th param is the optional verbosity to show llamafile output and it defaults to false");
modelHelp();
break;
default:
System.out.println("ERROR: Unknown command: " + commandName);
showHelp();
}
}
static void showHelp() {
System.out.println("\nUsage:");
System.out.println(" jllm [command]");
System.out.println("\nAvailable Commands:");
System.out.println(" run Run a model");
System.out.println(" pull Pull a model from HuggingFace");
System.out.println(" list List all downloaded models");
System.out.println(" ps Show all running models");
System.out.println(" kill Kill a running model");
System.out.println(" rm Remove a model");
System.out.println(" models Show all well-known models");
System.out.println(" help Help about any command");
System.out.println(" version Show version information");
System.out.println("\nUse \"jllm [command] help\" for more information about a command\n");
}
/**
* Pulls a model file from a repository and saves it locally.
*
* @param modelName The name of the model to be pulled.
* @return The local path of the pulled model file.
* @throws IOException If an I/O error occurs during the process.
*/
static File pullModel(String modelName) throws IOException {
String jllmDir = getJllmDir();
LlmModel llmModel = new LlmModel(modelName);
System.out.printf("Pulling file %s from %s/%s\n", llmModel.modelFile, llmModel.modelOwner, llmModel.modelRepo);
File modelPath = downloadModel(llmModel, jllmDir);
System.out.printf("Pulled to %s\n", modelPath);
return modelPath;
}
/**
* Runs the specified model. If the model is not already present, it downloads the model.
*
* @param modelName The name of the model to be run (in either short or long format).
* @param listenHost The host where the model should listen.
* @param listenPort The port where the model should listen.
* @param isVerbose A boolean flag indicating whether verbose output should be produced.
* @throws IOException If an I/O error occurs during the process.
*/
static void runModel(String modelName, String listenHost, int listenPort, boolean isVerbose) throws IOException {
// Start running the specified model. Download model if not already present.
LlmModel llmModel = new LlmModel(modelName);
String modelFile = pathFromModel(llmModel);
if (llmModel.modelOwner.equals("local-owner") && llmModel.modelRepo.equals("file")) {
modelFile = llmModel.modelFile;
} else if (llmModel.modelOwner.equals("local-owner") && llmModel.modelRepo.equals("url")) {
if (modelFile.equals(""))
modelFile = pullModel(modelName).toString();
} else {
if (modelFile.equals(""))
modelFile = pullModel(modelName).toString();
}
ModelLauncher.launchModel(modelFile, llmModel.getMultiModalFile(), listenHost, listenPort, isVerbose);
System.out.printf("Running %s at %s:%s\n", modelFile, listenHost, listenPort);
}
/**
* Checks if the input string is an integer or not.
*
* @param inString The string to be checked.
* @return true if the string is an integer, false otherwise.
*/
static boolean isInteger(String inString) {
try {
Integer.parseInt(inString);
return true;
} catch (NumberFormatException e) {
return false;
}
}
/**
* Determine the server listen port number. If the input argument is an integer, then use that port number as the start.
* Otherwise, it uses a default starting port number. It then gets the next available/open port number starting from the chosen port number.
*
* @param portArg The input argument to be parsed to a port number.
* @return The next available port number starting from the chosen port number.
*/
static int getServerPort(String portArg) {
int listenPort;
if (isInteger(portArg))
listenPort = Integer.parseInt(portArg);
else
listenPort = DEFAULT_PORT;
return nextPort(listenPort);
}
/**
* Finds the next available port starting from the given port.
*
* @param startPort The port number from which to start the search.
* @return The next available port number. If no port is available within the range of startPort to startPort + 100, it returns -1.
*/
public static int nextPort(int startPort) {
for (int foundPort = startPort; foundPort <= startPort + 100; foundPort++) {
if (portAvailable(foundPort)) {
return foundPort;
}
}
return -1; // No available port found
}
/**
* Checks if a specific port is available to listen on.
*
* @param listenPort The port number to check.
* @return true if the port is available, false otherwise.
*/
static boolean portAvailable(int listenPort) {
try (ServerSocket ignored = new ServerSocket(listenPort)) {
return true; // Port is available
} catch (IOException e) {
return false; // Port is not available
}
}
/**
* Terminates running models that match the given criteria.
*
* @param killStr The string used to identify the models to be terminated. This can be a PID, port number, model owner's name, or model repository name.
*/
public static void killModel(String killStr) {
int killIt = 0;
if (isInteger(killStr))
killIt = Integer.parseInt(killStr);
else
killStr = killStr.toLowerCase();
int numKilled = 0;
// Kill running models, either filename from repo_id or instances of models from repo_id.
// Return all the cached models currently running via llamafile
List<LlmModel> runningModels = ProcessLister.listProcessesByName(getLlamaExec());
for (LlmModel runningModel : runningModels) {
if (runningModel.runningPid == killIt ||
runningModel.runningPort == killIt ||
runningModel.modelFile.toLowerCase().contains(killStr) ||
runningModel.modelOwner.toLowerCase().contains(killStr) ||
runningModel.modelRepo.toLowerCase().contains(killStr) ||
runningModel.getShortName().contains(killStr)) {
System.out.printf("Killing pid %s for %s/%s on port %s\n", runningModel.runningPid, runningModel.modelOwner, runningModel.modelRepo, runningModel.runningPort);
ProcessHandle.of(runningModel.runningPid).ifPresent(ProcessHandle::destroy);
numKilled++;
}
}
if (numKilled == 0)
System.out.println("WARNING: No such processes found/killed");
else
System.out.println("Killed " + numKilled + " processes");
}
/**
* Lists all the models currently running via llamafile.
* <p>
* This method fetches all the running models and prints their details including PID, model repository, model owner,
* and the host and port on which they are running. If no models are currently running, it prints a message indicating the same.
*/
public static void listRunningModels() {
// Return all the cached models currently running via llamafile
List<LlmModel> runningModels = ProcessLister.listProcessesByName(getLlamaExec());
if (!runningModels.isEmpty()) {
System.out.printf("Models in %s currently running...\n", getJllmDir());
for (LlmModel runningModel : runningModels) {
System.out.print("PID " + runningModel.runningPid + " has ");
String shortName = runningModel.getShortName();
if (shortName.equals(""))
System.out.print("file [" + runningModel.modelRepo + "]");
else
System.out.print(shortName + " [" + runningModel.modelRepo + " from " + runningModel.modelOwner + "]");
System.out.println(" running on " + runningModel.runningHost + ":" + runningModel.runningPort);
}
} else {
System.out.println("No models currently running");
}
}
/**
* Searches for a specific model file in a list of repository files.
*
* @param repoFiles The list of repository files to search through.
* @param modelFile The name of the model file to find.
* @return The name of the found model file. If the model file is not found, it returns an empty string.
*/
public static String findModel(List<String> repoFiles, String modelFile) {
for (String currFile : repoFiles) {
if (currFile.endsWith(modelFile)) {
return currFile;
}
}
return "";
}
/**
* Checks if the input string contains wildcard characters.
*
* @param wildPattern The input string to be checked for wildcard characters.
* Wildcard characters are '*' and '?'.
* @return True if the string contains wildcard characters, false otherwise.
*/
public static boolean isWild(String wildPattern) {
return wildPattern.contains("*") || wildPattern.contains("?");
}
/**
* Checks if the input string matches the given wildcard pattern. Both strings are considered case insensitive.
*
* @param inputString The input string to be matched.
* @param wildPattern The wildcard pattern used for matching.
* '?' matches any single character.
* '*' matches any sequence of characters (including the empty sequence).
* @return True if the string matches the wildcard pattern, false otherwise.
*/
public static boolean isWildMatch(String inputString, String wildPattern) {
if (wildPattern.isEmpty())
return inputString.isEmpty();
if (inputString.isEmpty())
return wildPattern.equals("*");
inputString = inputString.toLowerCase();
wildPattern = wildPattern.toLowerCase();
if (wildPattern.charAt(0) == '?') {
return isWildMatch(inputString.substring(1), wildPattern.substring(1));
} else if (wildPattern.charAt(0) == '*') {
return isWildMatch(inputString, wildPattern.substring(1)) || isWildMatch(inputString.substring(1), wildPattern);
} else {
return (inputString.charAt(0) == wildPattern.charAt(0)) && isWildMatch(inputString.substring(1), wildPattern.substring(1));
}
}
/**
* This method is used to get the .jllm directory AND setup all the config in it.
* If the directory exists, it reads the configuration from the existing .jllm directory.
* If it does not, it initializes the .jllm directory and seeds its properties with default values.
*
* @return The location of .jllm as a String.
*/
public static String getJllmDir() {
if (new File(JLLM_DIR).isDirectory())
return readJllmDir(JLLM_DIR); // the .jllm dir already exists. Read config from there
else
return createJllmDir(JLLM_DIR); // the .jllm dir does not exist. Initialize it with default values
}
/**
* Locate & reads both jllm.properties & model.properties files in the specified .jllm directory.
* If either one is missing it will create them using default values
*
* @param dirName The name of the directory to read the property files from.
* @return The name of the directory.
*/
public static String readJllmDir(String dirName) {
String propFile = dirName + File.separator + "jllm.properties";
if (!new File(propFile).exists()) createProps(propFile);
readProps(propFile);
String modelFile = dirName + File.separator + "models.properties";
if (!new File(modelFile).exists()) createModelProps(modelFile);
readModelProps(dirName + File.separator + "models.properties");
return dirName;
}
/**
* Create the .jllm dir and initialize it by creating both property files populated with default values
*
* @param dirName The name of the directory to be created.
* @return The name of the created directory.
*/
public static String createJllmDir(String dirName) {
createDir(dirName);
createProps(dirName + File.separator + "jllm.properties");
createModelProps(dirName + File.separator + "models.properties");
return dirName;
}
/**
* Return the executable "llamafile" binary
* On windows make sure that the binary is "llamafile.exe" instead of "llamafile"
*
* @return The name of the executable llamafile binary.
*/
public static String getLlamaExec() {
if (IS_WINDOWS && !LLAMA_EXEC.endsWith(".exe"))
return LLAMA_EXEC + ".exe";
return LLAMA_EXEC;
}
/**
* Read the jllm properties file. Note that all variables read from properties are in UPPER CASE.
*
* @param propFile the name of the jllm properties file
*/
public static void readProps(String propFile) {
Properties jllmProps = new Properties();
try (FileInputStream propStream = new FileInputStream(propFile)) {
jllmProps.load(propStream);
DEFAULT_PORT = Integer.parseInt(jllmProps.getProperty("DEFAULT_PORT"));
BUFFER_SIZE = Integer.parseInt(jllmProps.getProperty("BUFFER_SIZE"));
DEFAULT_QUANT = jllmProps.getProperty("DEFAULT_QUANT");
DEFAULT_FILE_EXT = jllmProps.getProperty("DEFAULT_FILE_EXT");
LLAMA_EXEC = jllmProps.getProperty("LLAMA_EXEC");
LLAMA_PARAMS = jllmProps.getProperty("LLAMA_PARAMS");
} catch (IOException ex) {
ex.printStackTrace();
}
}
/**
* Read the model properties file
*
* @param propFile the name of the model properties file
*/
public static void readModelProps(String propFile) {
SortedMap<String, String> resultMap = new TreeMap<>();
try (BufferedReader modelReader = new BufferedReader(new FileReader(propFile))) {
String currLine;
while ((currLine = modelReader.readLine()) != null) {
if (currLine.trim().startsWith("#"))
continue;
String[] lineParts = currLine.split("=", 2);
if (lineParts.length >= 2)
resultMap.put(lineParts[0].trim(), lineParts[1].trim());
else
System.out.println("ERROR: Illegal line in Model file: " + currLine);
}
MODEL_MAP = resultMap;
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Create a jllm properties file with default values for all settings
*
* @param propFile the name of the jllm properties file
*/
public static void createProps(String propFile) {
Properties jllmProps = new Properties();
// set the properties value
jllmProps.setProperty("DEFAULT_PORT", String.valueOf(DEFAULT_PORT));
jllmProps.setProperty("BUFFER_SIZE", String.valueOf(BUFFER_SIZE));
jllmProps.setProperty("DEFAULT_QUANT", DEFAULT_QUANT);
jllmProps.setProperty("DEFAULT_FILE_EXT", DEFAULT_FILE_EXT);
jllmProps.setProperty("LLAMA_EXEC", LLAMA_EXEC);
jllmProps.setProperty("LLAMA_PARAMS", LLAMA_PARAMS);
try (FileOutputStream outStream = new FileOutputStream(propFile)) {
jllmProps.store(outStream, "All jllm properties can be edited here");
System.out.println("Saved properties to: " + propFile);
} catch (IOException ex) {
ex.printStackTrace();
}
}
/**
* Create a model properties file with default values for all settings
*
* @param propFile the name of the model properties file
*/
public static void createModelProps(String propFile) {
try (BufferedWriter propWriter = new BufferedWriter(new FileWriter(propFile))) {
// Iterate over the entries of the map in alphabetical order of keys
for (Map.Entry<String, String> mapEntry : MODEL_MAP.entrySet()) {
propWriter.write(mapEntry.getKey() + " = " + mapEntry.getValue()); // set the properties value
propWriter.newLine();
}
} catch (IOException ex) {
ex.printStackTrace();
}
}
/**
* Print a list of all locally downloaded models (in the .jllm directory)
*
* @throws IOException if an I/O error occurs
*/
static void listModels() throws IOException {
List<String> allModels = listLocalModels(); // List the locally cached models
System.out.printf("Models stored in %s:\n", getJllmDir());
long totalSize = 0;
int modelCount = 0;
for (String currModel : allModels) {
String[] nameParts = currModel.split("/", 3);
String shortName = reverseMapLookup(nameParts);
Path filePath = Paths.get(getJllmDir() + "/" + currModel);
long fileSize = Files.size(filePath);
totalSize += fileSize;
modelCount ++;
System.out.println(shortName + " -> " + currModel + " (" + humanReadableByteCount(fileSize, true) + ")");
}
System.out.println("Total: " + humanReadableByteCount(totalSize, true) + " in " + modelCount + " models");
}
/**
* Converts a byte count to a human-readable string representation.
*
* @param byteCount The number of bytes.
* @param useSIunits Whether to use the SI (International System of Units) units or binary units.
* @return A human-readable string representation of the byte count.
*/
public static String humanReadableByteCount(long byteCount, boolean useSIunits) {
int unit = useSIunits ? 1000 : 1024;
if (byteCount < unit) return byteCount + " B";
int exp = (int) (Math.log(byteCount) / Math.log(unit));
String pre = (useSIunits ? "kMGTPE" : "KMGTPE").charAt(exp-1) + (useSIunits ? "" : "i");
return String.format("%.1f %sB", byteCount / Math.pow(unit, exp), pre);
}
/**
* Performs a reverse lookup in a map based on the value parts
* Inefficient since it loops through entire map but map is small & so it performs ok
*
* @param valueParts An array of strings representing the parts of the value to be looked up in the map.
* @return The key associated with the given value parts, or an empty string if no matching key is found.
*/public static String reverseMapLookup (String[] valueParts) {
for (Map.Entry<String, String> mapEntry : MODEL_MAP.entrySet()) {
String mapValue = mapEntry.getValue();
if (!mapValue.startsWith(valueParts[0]))
continue;
String[] mapValueParts = mapValue.split("/", 4);
if (mapValueParts.length == 2 && mapValueParts[1].equals(valueParts [1]))
return mapEntry.getKey();
if (mapValueParts.length >= 3 && mapValueParts[1].equals(valueParts [1]) && mapValueParts[2].equals(valueParts [2]))
return mapEntry.getKey();
}
return "";
}
/**
* Return a list of all the locally downloaded models.
*
* @return A List of Strings, where each String is the relative path of a model file from the Jllm directory.
* @throws IOException If an I/O error occurs when opening the directory.
*/
public static List<String> listLocalModels() throws IOException {
String modelDir = getJllmDir();
File jllmDirectory = new File(modelDir);
if (jllmDirectory.isDirectory()) {
return listLocalModels(listAllFiles(modelDir));
}
return new ArrayList<>();
}
/**
* This method lists all the downloaded models with the default file extension (gguf) from a list of all files.
*
* @param allFiles A List of Strings, where each String is the absolute path of a file.
* @return A List of Strings, where each String is the relative path of a model file from the .jllm directory.
*/
public static List<String> listLocalModels(List<String> allFiles) {
List<String> allModels = new ArrayList<>();
for (String currFile : allFiles) {
if (currFile.endsWith("." + DEFAULT_FILE_EXT)) {
String[] modelFromPath = modelFromPath(currFile);
allModels.add(String.join("/", modelFromPath));
}
}
return allModels;
}
/**
* This method lists all the files in the specified directory and its subdirectories.
*
* @param modelDir The directory to search for files.
* @return A List of Strings, where each String is the absolute path of a file in the directory.
* @throws IOException If an I/O error occurs when opening the directory.
*/
public static List<String> listAllFiles(String modelDir) throws IOException {
return Files.walk(Paths.get(modelDir))
.filter(Files::isRegularFile)
.map(Path::toAbsolutePath)
.map(Path::toString)
.collect(Collectors.toList());
}
/**
* Returns the relative path from the base directory to the model file.
*
* @param baseDir The base directory as a string.
* @param modelFile The model file as a string.
* @return The relative path from the base directory to the model file as a string.
*/
public static String getRelativePath(String baseDir, String modelFile) {
// Convert the input strings to Paths
Path basePath = Paths.get(baseDir).normalize().toAbsolutePath();
Path modelPath = Paths.get(modelFile).normalize().toAbsolutePath();
// Check if modelPath starts with basePath
if (!modelPath.startsWith(basePath)) {
throw new IllegalArgumentException("Base dir " + baseDir + " must be a parent directory of model file: " + modelFile);
}
// Return the relative path
return basePath.relativize(modelPath).toString();
}
/**
* This method takes a model file path as input and returns an array of Strings.
* Each String in the array represents a segment of the relative path from the .jllm directory
* to the model file which makes it easy to get a model name.
*
* @param modelFile The absolute path of the model file.
* @return the relative path of the model file from the .jllm dir returned as an array of path segments.
*/
public static String[] modelFromPath(String modelFile) {
return getRelativePath(getJllmDir(), modelFile).split(Pattern.quote(File.separator));
}
/**
* This method constructs a path string from the .jllm directory to the specified model.
*
* @param llmModel The LlmModel for which we need to path.
* @return A String representing the path from the .jllm directory to the model.
*/
public static String pathFromRepo(LlmModel llmModel) {
return Paths.get(getJllmDir(), llmModel.modelOwner, llmModel.modelRepo).toString();
}
/**
* Returns the full absolute path to the model file for the given LlmModel object.
* If the model file does not exist, it returns an empty string.
* It scans all downloaded model files and finds the one matching the required model.
*
* @param llmModel the LlmModel object
* @return the path of the model file if it exists, otherwise an empty string
* @throws IOException if an I/O error occurs
*/
public static String pathFromModel(LlmModel llmModel) throws IOException {
String modelPath = pathFromRepo(llmModel);
if (!new File(modelPath).exists())
return "";
List<String> repoFiles = listAllFiles(modelPath);
return jllm.findModel(repoFiles, llmModel.modelFile);
}
/**
* Dump the list of all known models.
*
* @param isVerbose A boolean flag indicating whether verbose output containing model download URL needs to be printed.
* @param isSizeNeeded A boolean flag indicating whether model file size is needed.
* @param modelWildcard A string containing wildcards to limit the models shown to ones that match
* @throws IOException If an input or output exception occurred.
*/
static void dumpModelMap(boolean isVerbose, boolean isSizeNeeded, String modelWildcard) throws IOException {
int numShown = 0;
for (Map.Entry<String, String> currEntry : MODEL_MAP.entrySet()) {
String currModel = currEntry.getValue();
if (!entryMatchWild(currEntry.getKey(), currEntry.getValue(), modelWildcard))
continue;
LlmModel llmModel = new LlmModel(currModel);
System.out.print(currEntry.getKey() + " -> " + llmModel);
numShown ++;
if (isSizeNeeded) {
String filesUrl = String.format("https://huggingface.co/%s/%s/tree/main", llmModel.modelOwner, llmModel.modelRepo);
if (llmModel.modelExtras != null)
filesUrl += llmModel.modelExtras;
String pageData = urlFetcher(filesUrl, "GET", null,null, "", false);
String lookFor = llmModel.modelFile + "?download=true\">";
int startIndex = pageData.indexOf(lookFor);
int endIndex = pageData.indexOf("<", startIndex);
String scrapeSize = pageData.substring(startIndex + lookFor.length(), endIndex).trim();
System.out.print(" (" + scrapeSize + ")");
lookFor = "mmproj";
startIndex = pageData.lastIndexOf(lookFor);
lookFor = "/resolve/main/";
int startIndex2 = pageData.lastIndexOf(lookFor, startIndex) + lookFor.length();
endIndex = pageData.indexOf("<div", startIndex2);
if (startIndex > 0)
System.out.print(" MMPROJ: " + pageData.substring(startIndex2, endIndex).replace("?download=true\">", " (").replace("\n", ")").trim());
}
if (isVerbose)
System.out.print("\n\tfrom: " + llmModel.modelUrl);
System.out.println();
}
if (numShown == MODEL_MAP.size())
System.out.println(MODEL_MAP.size() + " models available to download");
else
System.out.println("Listed " + numShown + " of total " + MODEL_MAP.size() + " models available to download");
}
/**
* Checks if either string matches a wildcard.
*
* @param string1 the key to check
* @param string2 the value to check
* @param modelWildcard the wildcard model to match against
* @return true if the key or value matches the wildcard model, or if the wildcard model is empty; false otherwise
*/
private static boolean entryMatchWild(String string1, String string2, String modelWildcard) {
return modelWildcard.equals("") || isWildMatch(string1, modelWildcard) || (isWildMatch(string2, modelWildcard));
}
/**
* Download the specified model from HF.
*
* @param llmModel The LlmModel object to be downloaded.
* @return The downloaded model as a File object.
* @throws IOException If an input or output exception occurred.
*/
static File downloadModel(LlmModel llmModel, String jllmDir) throws IOException {
String targetFilename = jllmDir + File.separator + llmModel.modelOwner + File.separator + llmModel.modelRepo + File.separator + llmModel.modelFile;
File targetFile = new File(targetFilename);
if (targetFile.exists() && targetFile.isDirectory()) {
System.out.println("WARNING: The model is already downloaded. Using existing model file");
return targetFile;
}
createDir(getJllmDir() + File.separator + llmModel.modelOwner + File.separator + llmModel.modelRepo);
urlFetcher(llmModel.modelUrl, "GET", null, null, targetFilename, true);
if (llmModel.isMultimodal) {
String multiModalFilename = llmModel.getMultiModalFile();
System.out.println("Pulling multimodal project file " + multiModalFilename + " from " + llmModel.modelOwner + "/" + llmModel.modelRepo);
urlFetcher(llmModel.multiModalUrl, "GET", null, null, multiModalFilename, true);
}
return new File(targetFilename);
}
/**
* Create the directory with the specified path.
*
* @param dirPath The path to be created.
*/
static void createDir(String dirPath) {
Path pathToCreate = Paths.get(dirPath);
if (!Files.exists(pathToCreate)) {
try {
Files.createDirectories(pathToCreate);
System.out.println("Created directory: " + dirPath);
} catch (IOException e) {
System.out.println("Failed to create directory!" + e.getMessage());
}
} else {
System.out.println("Directory already exists " + dirPath);
}
}
/**
* Extract the name of the file from a specified URL
*
* @param urlStr The url containing a file name at the end.
* @throws MalformedURLException If the url has incorrect format.
*/
public static String extractFilenameFromUrl(String urlStr) throws MalformedURLException {
URL urlObj = URI.create(urlStr).toURL();
return Paths.get(urlObj.getPath()).getFileName().toString();
}
/**
* Delete the model with the specified name
*
* @param modelName The string containing a model name.
* @throws IOException If an input or output exception occurred.
*/
static void rmModel(String modelName) throws IOException {
LlmModel llmModel = new LlmModel(modelName);
File modelDir = new File(llmModel.getModelDir());
if (modelDir.exists()) {
System.out.printf("Removing model %s -> %s/%s\n", modelName, llmModel.modelOwner, llmModel.modelRepo);
deleteFolder(modelDir);
} else {
System.out.printf("ERROR: Model %s not found\n", modelName);
}
}
/**
* Delete the specified folder recursively. If this results in an empty parent the delete
* the parent folder also.
*
* @param folderToDelete The folder to delete.
*/
public static void deleteFolder(File folderToDelete) {
File[] allFiles = folderToDelete.listFiles();
if (allFiles != null) {
for (File currFile : allFiles) {
if (currFile.isDirectory()) {
deleteFolder(currFile); // Recursive delete for subdirectories
} else {
currFile.delete(); // Delete files
}
}
}
folderToDelete.delete(); // Delete the current folder after deleting its contents
File parentFolder = folderToDelete.getParentFile();
if (isFolderEmpty(parentFolder)) // Delete the parent folder if it is empty
parentFolder.delete();
}
/**
* Checks if a folder is empty.
*
* @param theFolder The folder to check.
* @return true if the folder is empty, false otherwise.
*/
public static boolean isFolderEmpty(File theFolder) {
if (!theFolder.exists() || !theFolder.isDirectory()) {
throw new IllegalArgumentException("Invalid folder path: " + theFolder.getAbsolutePath());
}
String[] fileList = theFolder.list();
return fileList == null || fileList.length == 0;
}
/**
* Generic URL downloader which can use any http method to download to a file or just return a string
*
* @param urlString The url to download.
* @param httpMethod The http method to use.
* @param headerMap The http headers to use.
* @param postData The data to use for POST request
* @param targetFile The file to save or empty string to return downloaded string.
* @param showProgress The flag to determine whether to show download progress or not (useful for large files).
* @throws IOException If an input or output exception occurred.
*/
public static String urlFetcher(String urlString, String httpMethod, Map<String, String>headerMap, String postData, String targetFile, boolean showProgress) throws IOException {
if (!targetFile.isEmpty() && new File(targetFile).exists()) {
System.out.println("WARNING: The file " + targetFile + " already exists, so skipping download");
return "OK";
}
HttpURLConnection urlConn = null;
InputStream inStream = null;
OutputStream outStream = null;
try {
URL urlObj = URI.create(urlString).toURL();
urlConn = (HttpURLConnection) urlObj.openConnection();
urlConn.setRequestMethod(httpMethod);
if (headerMap != null) {
for (Map.Entry<String, String> headerEntry : headerMap.entrySet()) {
urlConn.setRequestProperty(headerEntry.getKey(), headerEntry.getValue());
}
}
if (httpMethod.equals("POST") && postData != null) {
// Enable output for POST data
urlConn.setDoOutput(true);
// Write POST data
outStream = urlConn.getOutputStream();
outStream.write(postData.getBytes());
outStream.flush();
outStream.close();
}
urlConn.connect();
int responseCode = urlConn.getResponseCode();
long fileSize = urlConn.getContentLength();
if (responseCode == HttpURLConnection.HTTP_OK) {
inStream = urlConn.getInputStream();
if (targetFile.isEmpty())
outStream = new ByteArrayOutputStream();
else
outStream = new FileOutputStream(targetFile);
byte[] dataBuffer = new byte[BUFFER_SIZE];
long currentSize = 0;
int bytesRead;
char[] spinnerChars = new char[]{'|', '/', '-', '\\'}; // Spinner characters
int loopCounter = 0;
while ((bytesRead = inStream.read(dataBuffer)) != -1) {
outStream.write(dataBuffer, 0, bytesRead);
if (showProgress) {
if (fileSize > 0) {
currentSize += bytesRead;
final int currentProgress = (int) ((((double) currentSize) / ((double) fileSize)) * 100d);
System.out.print("\rDownloaded " + currentProgress + "% of the file");
} else { // If file size was not provided we cannot calculate % downloaded
loopCounter++;
if (loopCounter % 30 == 0)
System.out.print("\r" + spinnerChars[loopCounter % 4]); // Print a spinner character
}
}
}
if (showProgress)
System.out.println();
if (httpMethod.equals("HEAD"))
return String.valueOf(fileSize);
if (targetFile.isEmpty())
return outStream.toString(); // If downloading as string, return the result
else
return urlConn.getResponseMessage();
} else {
throw new IOException("Failed to download data. Response code: " + responseCode + " - " + urlConn.getResponseMessage() + " for URL: " + urlString);
}
} finally {
if (urlConn != null) urlConn.disconnect();
if (inStream != null) inStream.close();
if (outStream != null) outStream.close();
}
}
/**
* Query the Github API to get the URL to latest release of a specified repo
*
* @param repoUrl The repo for which we need latest release.
* @throws IOException If an input or output exception occurred.
*/
@SuppressWarnings("unchecked")
static String getLatestReleaseUrl(String repoUrl) throws IOException {
String[] urlParts = repoUrl.split("/"); // Parse GitHub username and repository from URL
// Construct the URL for GitHub API to fetch release information
String apiUrl = "https://api.github.com/repos/" + urlParts[3] + "/" + urlParts[4] + "/releases/latest";
String responseString = urlFetcher(apiUrl, "GET", null, null, "", false);
PicoJson jsonParser = new PicoJson();
Map<String, String> jsonResult = (Map<String, String>) jsonParser.parseJson(responseString);
return jsonResult.get("html_url");
/*
String paramStart = "\"html_url\":\"";
String paramEnd = "\",\"";
int htmlUrlStart = responseString.indexOf(paramStart);
int htmlUrlEnd = responseString.indexOf(paramEnd, htmlUrlStart);
return responseString.substring(htmlUrlStart + paramStart.length(), htmlUrlEnd);
// Parse JSON response
org.json.JSONObject jsonResponse = new JSONObject(response.toString());
// Extract the URL of the latest release
return jsonResponse.getString("html_url");
*/
}
/**
* Get the download URL for the latest version of LLAMAFILE
*/
static String getLatestDownloadUrl() {
try {
String repoUrl = "https://github.com/Mozilla-Ocho/llamafile";
String latestRelease = getLatestReleaseUrl(repoUrl);
System.out.println("URL to latest release: " + latestRelease);
String[] urlParts = latestRelease.split("/");
// The tag is the last part of the URL
String releaseTag = urlParts[urlParts.length - 1];
String finalUrl = String.format("%s/releases/download/%s/llamafile-%s", repoUrl, releaseTag, releaseTag);
System.out.println("URL to latest file: " + finalUrl);
return finalUrl;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
/**
* Search all the dirs in the PATH for the specified executable
*
* @param commandName The command whose location is needed
*/
public static File whereIs(String commandName) {
String systemPath = System.getenv("PATH");
String pathSeparator = System.getProperty("path.separator");
StringTokenizer pathTokenizer = new StringTokenizer(systemPath, pathSeparator);
while (pathTokenizer.hasMoreTokens()) {
String pathElement = pathTokenizer.nextToken();
File execFile = new File(pathElement, commandName);
if (execFile.isFile() && execFile.canExecute()) {
return execFile;
}
}
return null; // Executable not found in PATH
}
/**
* For models which don't have a file name specified we compute the name using defaults
*
* @param modelRepo The model whose filename is needed
*/
public static String computeDefaultFilename(String modelRepo) {
String[] fileParts = modelRepo.toLowerCase().split("-");
if (!fileParts[fileParts.length - 1].contains(DEFAULT_FILE_EXT))
throw new RuntimeException("ERROR: The model MUST be of type " + DEFAULT_FILE_EXT + " for model repo: " + modelRepo);
String modelBase = String.join("-", Arrays.copyOf(fileParts, fileParts.length - 1));
return String.join(".", modelBase, DEFAULT_QUANT, DEFAULT_FILE_EXT);
}
/**
* Checks whether the given filename corresponds to an existing file or not.
*
* @param fileName The name of the file to check.
* @return true if the file exists, otherwise returns false.
*/
public static boolean isValidFile(String fileName) {
try {
Path filePath = Paths.get(fileName);
return Files.exists(filePath);
} catch (InvalidPathException | NullPointerException ex) {
return false;
}
}
/**
* Execute an external process in a specific directory. Wait until a specific string is outputted and
* print all output line if verbosity is needed.
*
* @param procName The name of the external executable process
* @param execDir The dir in which to run the process
* @param lookFor The string to locate in the output before returning
* @param isVerbose If true, then print every line of output
*/
static List<String> execProcess(String procName, String execDir, String lookFor, boolean isVerbose) throws IOException {
ProcessBuilder processBuilder = new ProcessBuilder(procName.split(" "));
processBuilder.redirectErrorStream(true);
Map<String, String> processEnv = processBuilder.environment();
processEnv.put("JLLM_DIR", execDir);
processBuilder.directory(new File(execDir));
Process runningProcess = processBuilder.start();
List<String> procOut = new ArrayList<>();
BufferedReader procReader = new BufferedReader(new InputStreamReader(runningProcess.getInputStream()));
String currLine;
while ((currLine = procReader.readLine()) != null) {
procOut.add(currLine);
if (isVerbose)
System.out.println(currLine);
if (lookFor != null && currLine.contains(lookFor))
return procOut;
}
try {
int exitCode = runningProcess.waitFor();
if (exitCode != 0)
procOut.add("ERROR: Unable to execute process: " + procName);
return procOut;
} catch (InterruptedException e) {
throw new IOException("Unable to execute process: " + procName + " due to " + e.getMessage());
}
}
/**
* An LLM model could be running or not. This class represents both cases
*/
static class LlmModel {
String modelOwner;
String modelRepo;
String modelFile;
String multiModalUrl;
String modelUrl;
String modelExtras;
String runningHost;
int runningPort;
long runningPid;
boolean isRunning = false;
boolean isMultimodal = false;
/**
* Constructs a new LlmModel with the specified model name.
*
* @param modelName the name of the model
* @throws MalformedURLException if the third part is a URL that is not valid
*/
public LlmModel(String modelName) throws MalformedURLException {
String initModelName = modelName;
if (modelName == null || modelName.isEmpty())
throw new RuntimeException("ERROR: No models found with model name: " + modelName);
if (isValidFile(modelName)) {
this.modelOwner = "local-owner";
this.modelRepo = "file";
this.modelUrl = "";
this.modelFile = modelName;
} else if (modelName.startsWith("http://") || modelName.startsWith("https://")) {
this.modelOwner = "local-owner";
this.modelRepo = "url";
this.modelUrl = modelName;
this.modelFile = extractFilenameFromUrl(modelName);
} else {
if (!modelName.contains("/"))
modelName = MODEL_MAP.get(modelName);
if (modelName == null)
throw new RuntimeException("ERROR: No models found with model name: " + initModelName);
if (modelName.contains("?")) {
String[] nameParamsplit = modelName.split("\\?", 2);
modelName = nameParamsplit[0];
this.modelExtras = "?" + nameParamsplit[1];
}
String[] nameParts = modelName.split("/", 4);
this.modelOwner = nameParts[0];
this.modelRepo = nameParts[1];
if (nameParts.length == 2) {
this.modelUrl = computeDownloadUrl(nameParts);
this.modelFile = computeDefaultFilename(nameParts[1]);
} else if (nameParts.length == 3) {
if (isUrl(nameParts[2])) {
this.modelUrl = nameParts[2];
this.modelFile = extractFilenameFromUrl(nameParts[2]);
} else {
this.modelUrl = computeDownloadUrl(nameParts);
this.modelFile = nameParts[2];
}
} else if (nameParts.length == 4) {
isMultimodal = true;
if (isUrl(nameParts[2])) {
this.modelUrl = nameParts[2];
this.modelFile = extractFilenameFromUrl(nameParts[2]);
} else {
this.modelUrl = computeDownloadUrl(nameParts);
this.modelFile = nameParts[2];
}
if (isUrl(nameParts[3])) {
this.multiModalUrl = nameParts[3];
} else {
this.multiModalUrl = computeMmprojDownloadUrl(nameParts);
}
}
}
}
/**
* Constructs a new LlmModel from a running process.
*
* @param processString the output of a ps command (platform specific)
* @param isRunning whether the process is currently running or not
*/
public LlmModel(String processString, boolean isRunning) {
// On windows processString looks like:
//C:\Users\sk\.jllm\llamafile.exe --model C:\Users\sk\.jllm\TheBloke\phi-2-GGUF\phi-2.Q4_K_M.gguf --host localhost --port 8080 37072
// On Linux this looks like:
//12874 /home/sk/.jllm/llamafile --model /home/sk/.jllm/TheBloke/TinyLlama-1.1B-1T-OpenOrca-GGUF/tinyllama-1.1b-1t-openorca.Q4_K_M.gguf --host localhost --port 8080
// Notice that proc & pid order are reversed from each other. The getProcPid() call sorts that out
String[] procPid = getProcPid(processString);
// Need to parse out the command-line arguments from proc
this.modelFile = extractParam(procPid[0], "model");
this.runningHost = extractParam(procPid[0], "host");
this.runningPort = Integer.parseInt(extractParam(procPid[0], "port"));
this.runningPid = Long.parseLong(procPid[1]);
try {
String[] modelParts = modelFromPath(modelFile);
this.modelOwner = modelParts[0];
this.modelRepo = modelParts[1];
} catch (IllegalArgumentException e) {
this.modelOwner = "local-owner";
if (isValidFile(modelFile)) this.modelRepo = modelFile;
}
this.isRunning = isRunning;
}
/**
* Parse the process string and extract the process name & the pid into a string array.
*
* @param processString the output of a ps command (platform specific)
* @return an array of 2 strings - process name followed bt process pid
*/
static String[] getProcPid(String processString) {
if (startDigit(processString)) {
int spaceIndex = processString.indexOf(' '); // Split the line using the space index
if (spaceIndex != -1) {
String procPid = processString.substring(0, spaceIndex).trim();
String procName = processString.substring(spaceIndex + 1).trim();
return new String[]{procName, procPid};
}
} else {
int spaceIndex = processString.lastIndexOf(' '); // Split the line using the last space index
if (spaceIndex != -1) {
String procName = processString.substring(0, spaceIndex).trim();
String procPid = processString.substring(spaceIndex + 1).trim();
return new String[]{procName, procPid};
}
}
return new String[]{};
}
/**
* Extracts the value of a specified parameter from a command line string.
*
* @param cmdLine the command line string
* @param paramName the name of the parameter to extract
* @return the value of the parameter if it exists, an empty string otherwise
*/
private String extractParam(String cmdLine, String paramName) {
int paramPos = cmdLine.indexOf("--" + paramName);
int paramValStart = cmdLine.indexOf(" ", paramPos);
if (paramValStart == -1) return "";
int paramValEnd = cmdLine.indexOf(" ", paramValStart + 1);
if (paramValEnd == -1) paramValEnd = cmdLine.length();
return cmdLine.substring(paramValStart, paramValEnd).trim();
}
/**
* Checks if the string starts with a digit
*
* @param inString the string to be checked
* @return true if the first character of the string is a digit, false otherwise
*/
static boolean startDigit(String inString) {
return !inString.isEmpty() && Character.isDigit(inString.charAt(0));
}
/**
* Checks if the string contains a URL
*
* @param inString the string to be checked
* @return true if the string is a URL, false otherwise
*/
static boolean isUrl(String inString) {
return inString.startsWith("http://") || inString.startsWith("https://");
}
/**
* Create the String for the download URL for a specified model with the name parts specified.
*
* @param nameParts The array of name parts.
* @return the download url for the model
*/
static String computeDownloadUrl(String[] nameParts) {
String urlTemplate = "https://huggingface.co/%s/%s/resolve/main/%s?download=true";
if (nameParts.length == 2)
return String.format(urlTemplate, nameParts[0], nameParts[1], computeDefaultFilename(nameParts[1]));
else if (nameParts.length >= 3)
return String.format(urlTemplate, nameParts[0], nameParts[1], nameParts[2]);
else
return "ERROR: Cannot compute download URL from: " + nameParts;
}
/**
* Create the String for the download URL for the multimodal project in a specified model with the name parts specified.
*
* @param nameParts The array of name parts.
* @return the download url for the mmproj file of the model
*/
static String computeMmprojDownloadUrl(String[] nameParts) {
String urlTemplate = "https://huggingface.co/%s/%s/resolve/main/%s?download=true";
if (nameParts.length == 4)
return String.format(urlTemplate, nameParts[0], nameParts[1], nameParts[3]);
else
return "ERROR: Cannot compute download URL from: " + nameParts;
}
@Override
public String toString() {
return modelOwner + "/" + modelRepo + "/" + modelFile;
}
public String getModelDir() {
return getJllmDir() + File.separator + modelOwner + File.separator + modelRepo;
}
/**
* Create the String for path to the downloaded multimodal project file for the model.
*
* @return the path to the mmproj file of the model
*/
public String getMultiModalFile() {
try {
if (! isMultimodal) return "";
String multiModalProject = extractFilenameFromUrl(multiModalUrl);
return getJllmDir() + File.separator + modelOwner + File.separator + modelRepo + File.separator + multiModalProject;
} catch (MalformedURLException e) {
return "";
}
}
/**
* Get the short name for the model.
*
* @return the short name of the model
*/
public String getShortName() {
try {
String[] modelParts = modelFromPath(modelFile);
return reverseMapLookup(modelParts);
} catch (IllegalArgumentException e) {
return "";
}
}
}
static class ModelLauncher {
/**
* Launches a model with the specified parameters.
*
* @param modelFile The file path of the model to be launched. It must not be null.
* @param multiModalFile The file path of the multimodal project file to be launched. It must not be null but can be empty.
* @param listenHost The host on which the model should listen. It must not be null.
* @param listenPort The port on which the model should listen.
* @param isVerbose If true, the model will run in verbose mode, providing more detailed output.
* @throws IOException If an I/O error occurs during the operation.
*/
static void launchModel(String modelFile, String multiModalFile, String listenHost, int listenPort, boolean isVerbose) throws IOException {
File serverFile = getLlamaServer();
if (serverFile == null || !serverFile.exists()) { // The llamafile binary does not exist.
downloadLlamafile();
}
serverFile = new File(new File(getJllmDir()), getLlamaExec());
runLlamaServer(serverFile.getAbsolutePath(), modelFile, multiModalFile, listenHost, listenPort, isVerbose);
}
/**
* Locate the llamafile binary. First look in location specified in config file.
* If missing then look in $HOME/.jllm directory. If still missing then look in $PATH
*/
static File getLlamaServer() {
File serverFile = new File(LLAMA_EXEC);
if (serverFile.exists()) // The llamafile binary exists in config location
return serverFile;
serverFile = new File(new File(getJllmDir()), getLlamaExec());
if (serverFile.exists()) // The llamafile binary exists jllm dir
return serverFile;
serverFile = whereIs(getLlamaExec());
if (serverFile != null && serverFile.exists()) // The llamafile binary exists $PATH
return serverFile;
return null;
}
/**
* Downloads the latest Llamafile.
*
* @throws IOException If an I/O error occurs during the download.
*/
static void downloadLlamafile() throws IOException {
String downloadUrl = getLatestDownloadUrl();
String targetFilename = getJllmDir() + File.separator + LLAMA_EXEC;
if (IS_WINDOWS) // On Windows the filename needs a ".exe" at the end
targetFilename += ".exe";
urlFetcher(downloadUrl, "GET", null, null, targetFilename, true);
File downloadedFile = new File(targetFilename);
if (!downloadedFile.exists()) {
System.out.println("ERROR: Could not download server binary from " + downloadUrl);
System.exit(-1);
}
if (!IS_WINDOWS) // On Mac/Linux the file needs a "chmod +x"
downloadedFile.setExecutable(true);
}
}
/**
* Runs the Llama server with the specified parameters.
*
* @param llamaServer The file path of the Llama server. It must not be null.
* @param modelFile The file path of the model to be used by the Llama server. It must not be null.
* @param multiModalFile The file path of the multimodal project file to be launched. It must not be null but can be empty.
* @param listenHost The host on which the Llama server should listen. It must not be null.
* @param listenPort The port on which the Llama server should listen.
* @param isVerbose If true, the Llama server will run in verbose mode, providing more detailed output.
*/
static void runLlamaServer(String llamaServer, String modelFile, String multiModalFile, String listenHost, int listenPort, boolean isVerbose) {
try {
String fullCommand = llamaServer + " " + String.format(LLAMA_PARAMS, modelFile, listenHost, listenPort);
if (! multiModalFile.equals(""))
fullCommand += (" --mmproj " + multiModalFile);
List<String> procResp = execProcess(fullCommand, getJllmDir(), "all slots are idle and system prompt is empty", isVerbose);
if (procResp.get(procResp.size() - 1).startsWith("ERROR")) {
for (String currLine : procResp)
System.out.println(currLine);
System.exit(-1);
}
} catch (IOException e) {
System.out.println("Error starting LLM server, run with --verbose for more details");
System.exit(-1);
}
}
/**
* The Java Process API (via ProcessHandle.allProcesses() & then getting ProcessHandle.Info) does NOT return
* the command line arguments of a process. It was ALWAYS null, so I needed to make this painful hack!
*/
static class ProcessLister {
static String winCmd = "wmic process where \"commandline like '%[l]lamafile.exe%'\" get processid, commandline";
static String linCmd = "ps -eo pid,args";
/**
* Lists all running server processes with the specified process name.
*
* @param processName The name of the process. It must not be null.
* @return A list of processes with the specified process name.
*/
static List<LlmModel> listProcessesByName(String processName) {
List<LlmModel> runningModels = new ArrayList<>();
try {
// Check the operating system
String psCmd = linCmd;
if (IS_WINDOWS) psCmd = winCmd;
List<String> procResp = execProcess(psCmd, getJllmDir(), null, false);
for (String currLine : procResp) {
if (currLine.contains(processName) && currLine.contains("--model") && currLine.contains("--port")) {
currLine = currLine.trim();
runningModels.add(new LlmModel(currLine, true));
}
}
} catch (IOException e) {
e.printStackTrace();
}
return runningModels;
}
}
/**
* This is a very basic Java implementation of a JSON parser, specifically a tokenizer and a recursive descent parser.
* It is designed to parse a JSON string into a Java object, such as a Map or List.
*/
public static class PicoJson {
private char[] jsonChars;
private int indexPointer;
/**
* Parses a JSON string into a Java object.
*
* @param jsonString the JSON string to parse
* @return the parsed Java object
* @throws IllegalArgumentException if the input string is null
* @throws RuntimeException if the input string is not valid JSON
*/
public Object parseJson(String jsonString) {
if (jsonString == null) throw new IllegalArgumentException("Cannot parse a null string");
this.jsonChars = jsonString.toCharArray();
this.indexPointer = 0;
return parseValue();
}
/**
* Parses a JSON value (object, array, string, number, boolean, or null).
*
* @return the parsed JSON value
* @throws RuntimeException if the input string is not valid JSON
*/
private Object parseValue() {
skipWhitespace();
char c = peekChar();
if (c == '{') return parseObject();
else if (c == '[') return parseArray();
else if (c == '\"') return parseString();
else if (c == 't' || c == 'f') return parseBoolean();
else if (c == 'n') return parseNull();
else if (c == '-' || Character.isDigit(c)) return parseNumber();
else throw new RuntimeException(getExceptionMessage("valid JSON value"));
}
/**
* Parses a JSON object.
*
* @return the parsed JSON object
* @throws RuntimeException if the input string is not valid JSON
*/
private Map<String, Object> parseObject() {
Map<String, Object> hashMap = new HashMap<>();
readChar('{');
while (peekChar() != '}') {
skipWhitespace();
String mapKey = parseString();
skipWhitespace();
readChar(':');
Object mapValue = parseValue();
hashMap.put(mapKey, mapValue);
skipWhitespace();
if (peekChar() == ',' && jsonChars[indexPointer + 1] != '}')
readChar(',');
}
readChar('}');
return hashMap;
}
/**
* Parses a JSON array.
*
* @return the parsed JSON array
* @throws RuntimeException if the input string is not valid JSON
*/
private List<Object> parseArray() {
List<Object> arrayList = new ArrayList<>();
readChar('[');
while (peekChar() != ']') {
skipWhitespace();
arrayList.add(parseValue());
skipWhitespace();
if (peekChar() == ',' && jsonChars[indexPointer + 1] != ']')
readChar(',');
}
readChar(']');
return arrayList;
}
/**
* Parses a JSON string.
*
* @return the parsed JSON string
* @throws RuntimeException if the input string is not valid JSON
*/
private String parseString() {
StringBuilder stringBuilder = new StringBuilder();
readChar('\"');
while (true) {
char prevChar = peekPrevChar();
char current = readChar();
if (current == '\"' && prevChar != '\\') {
break;
}
stringBuilder.append(current);
}
return stringBuilder.toString();
}
/**
* Parses a JSON boolean value from the input string.
*
* @return the parsed JSON boolean value
*/
private Boolean parseBoolean() {
if (tryRead("true")) return true;
if (tryRead("false")) return false;
throw new RuntimeException(getExceptionMessage("'true' or 'false'"));
}
/**
* Parses a null value from the JSON input.
*
* @return null if the input is "null", otherwise throws a RuntimeException
*/
private Object parseNull() {
if (tryRead("null"))
return null;
throw new RuntimeException(getExceptionMessage("null"));
}
/**
* Skips over whitespace characters in the JSON input.
*/
private void skipWhitespace() {
while (Character.isWhitespace(peekChar()))
readChar();
}
/**
* Parses a number value from the JSON input.
*
* @return the parsed number value
*/
private Number parseNumber() {
int startPointer = indexPointer;
while (Character.isDigit(peekChar()) || peekChar() == '-' || peekChar() == '.') readChar();
String numberString = new String(jsonChars, startPointer, indexPointer - startPointer);
if (numberString.contains(".")) {
return Double.parseDouble(numberString);
} else {
try {
return Integer.parseInt(numberString);
} catch (NumberFormatException e) {
try {
return Long.parseLong(numberString);
} catch (NumberFormatException e2) {
throw new RuntimeException(getExceptionMessage(numberString));
}
}
}
}
/**
* Reads the next character from the JSON input.
*
* @return the next character
*/
private char readChar() {
return jsonChars[indexPointer++];
}
/**
* Peeks at the previous character in the JSON input.
*
* @return the previous character
*/
private char peekPrevChar() {
return indexPointer > 0 ? jsonChars[indexPointer - 1] : '\0';
}
/**
* Gets the current line of JSON input for error reporting.
*
* @return the current line of JSON input
*/
private String getErrorJsonLine() {
int startPointer = indexPointer;
while (startPointer > 0 && jsonChars[startPointer] != '\n') {
startPointer--;
}
if (jsonChars[startPointer] == '\n')
startPointer++;
int endPointer = indexPointer;
while (endPointer < jsonChars.length && jsonChars[endPointer] != '\n') {
endPointer++;
}
return new String(jsonChars, startPointer, endPointer - startPointer);
}
/**
* Gets an error message for a parsing exception.
*
* @param expected the expected value or character
* @return the error message
*/
private String getExceptionMessage(String expected) {
return "Expected '" + expected + "' at position " + indexPointer + " with character '" + jsonChars[indexPointer] + "' on line " + getErrorJsonLine();
}
/**
* Reads a specific character from the JSON input.
*
* @param currChar the character to read
*/
private void readChar(char currChar) {
if (peekChar() != currChar)
throw new RuntimeException(getExceptionMessage(String.valueOf(currChar)));
readChar();
}
/**
* Peeks at the next character in the JSON input.
*
* @return the next character
*/
private char peekChar() {
return indexPointer < jsonChars.length ? jsonChars[indexPointer] : '\0';
}
/**
* Tries to read a string from the JSON input.
*
* @param readString the string to read
* @return true if the string is read successfully, false otherwise
*/
private boolean tryRead(String readString) {
for (int i = 0; i < readString.length(); i++) {
if (jsonChars[indexPointer + i] != readString.charAt(i))
return false;
}
indexPointer += readString.length();
return true;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment