Skip to content

Instantly share code, notes, and snippets.

@Green-Sky
Created June 30, 2023 15:41
Show Gist options
  • Save Green-Sky/aae372314f981d14b00852c34985c93a to your computer and use it in GitHub Desktop.
Save Green-Sky/aae372314f981d14b00852c34985c93a to your computer and use it in GitHub Desktop.
#include "llama-util.h"
#include "llama.h"
int main(void) {
//llama_file_saver lfs{"ggml-bytewise-vocab.bin", nullptr, LLAMA_FTYPE_ALL_F32};
llama_file file{"ggml-bytewise-vocab.bin", "wb"};
// magic
file.write_u32(LLAMA_FILE_MAGIC); // magic
file.write_u32(LLAMA_FILE_VERSION); // version
uint32_t n_vocab = 0x00ff + 3 + 1;
// hparams
file.write_u32(n_vocab);
file.write_u32(1/*n_embd*/);
file.write_u32(1/*n_mult*/);
file.write_u32(1/*n_head*/);
file.write_u32(1/*n_layer*/);
file.write_u32(1/*n_rot*/);
file.write_u32(0/*new_ftype*/);
// vocab
const float zero = 0.f;
// unk?
file.write_u32(0); // TODO: llama tok uses space-doublequestionmark-space
//file.write_raw(token_score.tok.data(), token_score.tok.size());
file.write_raw(&zero, sizeof(zero));
// bos
file.write_u32(0);
//file.write_raw(token_score.tok.data(), token_score.tok.size());
file.write_raw(&zero, sizeof(zero));
// eos
file.write_u32(0);
//file.write_raw(token_score.tok.data(), token_score.tok.size());
file.write_raw(&zero, sizeof(zero));
for (uint16_t i = 0; i <= 0x00ff; i++) {
file.write_u32(1);
uint8_t tmp_i = i;
file.write_raw(&tmp_i, 1);
file.write_raw(&zero, sizeof(zero));
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment