ink-splatters/gist:d0899a4749261b7dfd4818f40ff062b4

## gistfile1.sh
airstation:llama.cpp ic$ git rev-parse HEAD
952d03dbead16e4dbdd1d3458486340673cc2465
airstation:llama.cpp ic$ echo ; awk '(NR>=4341 &&  NR<=4382 ){print NR " " $0}' llama.cpp

4341         // for now, only BPE models have pre-tokenizers
4342         if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
4343             if (tokenizer_pre.empty()) {
4344                 LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
4345                 LLAMA_LOG_WARN("%s:                                             \n", __func__);
4346                 LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
4347                 LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED!        \n", __func__);
4348                 LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL             \n", __func__);
4349                 LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
4350                 LLAMA_LOG_WARN("%s:                                             \n", __func__);
4351                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
4352             } else if (
4353                     tokenizer_pre == "default") {
4354                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
4355             } else if (
4356                     tokenizer_pre == "llama3"   ||
4357                     tokenizer_pre == "llama-v3" ||
4358                     tokenizer_pre == "llama-bpe") {
4359                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
4360             } else if (
4361                     tokenizer_pre == "deepseek-llm") {
4362                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM;
4363             } else if (
4364                     tokenizer_pre == "deepseek-coder") {
4365                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER;
4366             } else if (
4367                     tokenizer_pre == "falcon") {
4368                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
4369             } else if (
4370                     tokenizer_pre == "mpt") {
4371                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
4372             } else if (
4373                     tokenizer_pre == "starcoder") {
4374                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_STARCODER;
4375             } else if (
4376                     tokenizer_pre == "gpt-2") {
4377                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;
4378             } else {
4379                 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
4380             }
4381         } else {
4382             vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
	airstation:llama.cpp ic$ git rev-parse HEAD
	952d03dbead16e4dbdd1d3458486340673cc2465
	airstation:llama.cpp ic$ echo ; awk '(NR>=4341 && NR<=4382 ){print NR " " $0}' llama.cpp

	4341 // for now, only BPE models have pre-tokenizers
	4342 if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
	4343 if (tokenizer_pre.empty()) {
	4344 LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__);
	4345 LLAMA_LOG_WARN("%s: \n", __func__);
	4346 LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
	4347 LLAMA_LOG_WARN("%s: GENERATION QUALITY WILL BE DEGRADED! \n", __func__);
	4348 LLAMA_LOG_WARN("%s: CONSIDER REGENERATING THE MODEL \n", __func__);
	4349 LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
	4350 LLAMA_LOG_WARN("%s: \n", __func__);
	4351 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
	4352 } else if (
	4353 tokenizer_pre == "default") {
	4354 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
	4355 } else if (
	4356 tokenizer_pre == "llama3" \|\|
	4357 tokenizer_pre == "llama-v3" \|\|
	4358 tokenizer_pre == "llama-bpe") {
	4359 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
	4360 } else if (
	4361 tokenizer_pre == "deepseek-llm") {
	4362 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM;
	4363 } else if (
	4364 tokenizer_pre == "deepseek-coder") {
	4365 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER;
	4366 } else if (
	4367 tokenizer_pre == "falcon") {
	4368 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
	4369 } else if (
	4370 tokenizer_pre == "mpt") {
	4371 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
	4372 } else if (
	4373 tokenizer_pre == "starcoder") {
	4374 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_STARCODER;
	4375 } else if (
	4376 tokenizer_pre == "gpt-2") {
	4377 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_GPT2;
	4378 } else {
	4379 throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
	4380 }
	4381 } else {
	4382 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;