fanweixiao/ggml-base-q5_1.bin

## ggml-base-q5_1.bin
$ ./main -m models/ggml-base-q5_1.bin -f ~/zh.wav
whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-base-q5_1.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head  = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 512
whisper_model_load: n_text_head   = 8
whisper_model_load: n_text_layer  = 6
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 2 (base)
whisper_model_load: adding 1608 extra tokens
whisper_model_load: n_langs       = 99
whisper_model_load:      CPU total size =    59.22 MB (1 buffers)
whisper_model_load: model size    =   59.12 MB
whisper_init_state: kv self size  =   16.52 MB
whisper_init_state: kv cross size =   18.43 MB
whisper_init_state: compute buffer (conv)   =   16.17 MB
whisper_init_state: compute buffer (encode) =   94.42 MB
whisper_init_state: compute buffer (cross)  =    5.08 MB
whisper_init_state: compute buffer (decode) =  105.96 MB

system_info: n_threads = 4 / 32 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | METAL = 0 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | CUDA = 0 | COREML = 0 | OPENVINO = 0 |

main: processing '/home/ubuntu/zh.wav' (79949 samples, 5.0 sec), 4 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ...


[00:00:00.000 --> 00:00:04.480]   I think running is the most important thing for me to see health.


whisper_print_timings:     load time =    53.78 ms
whisper_print_timings:     fallbacks =   0 p /   0 h
whisper_print_timings:      mel time =     6.41 ms
whisper_print_timings:   sample time =    27.94 ms /    82 runs (    0.34 ms per run)
whisper_print_timings:   encode time =  1310.36 ms /     1 runs ( 1310.36 ms per run)
whisper_print_timings:   decode time =     4.54 ms /     2 runs (    2.27 ms per run)
whisper_print_timings:   batchd time =   132.78 ms /    78 runs (    1.70 ms per run)
whisper_print_timings:   prompt time =     0.00 ms /     1 runs (    0.00 ms per run)
whisper_print_timings:    total time =  1539.46 ms

## ggml-large-v3-q5_0.bin
./main -m models/ggml-large-v3-q5_0.bin -f ~/zh.wav
whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-large-v3-q5_0.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51866
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 1280
whisper_model_load: n_audio_head  = 20
whisper_model_load: n_audio_layer = 32
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 1280
whisper_model_load: n_text_head   = 20
whisper_model_load: n_text_layer  = 32
whisper_model_load: n_mels        = 128
whisper_model_load: ftype         = 8
whisper_model_load: qntvr         = 2
whisper_model_load: type          = 5 (large v3)
whisper_model_load: adding 1609 extra tokens
whisper_model_load: n_langs       = 100
whisper_model_load:      CPU total size =  1080.97 MB (2 buffers)
whisper_model_load: model size    = 1080.47 MB
whisper_init_state: kv self size  =  220.20 MB
whisper_init_state: kv cross size =  245.76 MB
whisper_init_state: compute buffer (conv)   =   35.50 MB
whisper_init_state: compute buffer (encode) =  233.50 MB
whisper_init_state: compute buffer (cross)  =   10.15 MB
whisper_init_state: compute buffer (decode) =  108.99 MB

system_info: n_threads = 4 / 32 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | METAL = 0 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | CUDA = 0 | COREML = 0 | OPENVINO = 0 |

main: processing '/home/ubuntu/zh.wav' (79949 samples, 5.0 sec), 4 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ...


[00:00:00.000 --> 00:00:05.000]   I think running is most important because it brings me health.


whisper_print_timings:     load time =   326.38 ms
whisper_print_timings:     fallbacks =   0 p /   0 h
whisper_print_timings:      mel time =     7.40 ms
whisper_print_timings:   sample time =    24.98 ms /    72 runs (    0.35 ms per run)
whisper_print_timings:   encode time = 29965.32 ms /     1 runs (29965.32 ms per run)
whisper_print_timings:   decode time =    27.04 ms /     1 runs (   27.04 ms per run)
whisper_print_timings:   batchd time =  1586.20 ms /    69 runs (   22.99 ms per run)
whisper_print_timings:   prompt time =     0.00 ms /     1 runs (    0.00 ms per run)
whisper_print_timings:    total time = 31941.65 ms

## ggml-medium-q5_0.bin
./main -m models/ggml-medium-q5_0.bin -f ~/zh.wav
whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-medium-q5_0.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 1024
whisper_model_load: n_audio_head  = 16
whisper_model_load: n_audio_layer = 24
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 1024
whisper_model_load: n_text_head   = 16
whisper_model_load: n_text_layer  = 24
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 8
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 4 (medium)
whisper_model_load: adding 1608 extra tokens
whisper_model_load: n_langs       = 99
whisper_model_load:      CPU total size =   538.97 MB (1 buffers)
whisper_model_load: model size    =  538.59 MB
whisper_init_state: kv self size  =  132.12 MB
whisper_init_state: kv cross size =  147.46 MB
whisper_init_state: compute buffer (conv)   =   28.00 MB
whisper_init_state: compute buffer (encode) =  187.14 MB
whisper_init_state: compute buffer (cross)  =    8.46 MB
whisper_init_state: compute buffer (decode) =  107.98 MB

system_info: n_threads = 4 / 32 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | METAL = 0 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | CUDA = 0 | COREML = 0 | OPENVINO = 0 |

main: processing '/home/ubuntu/zh.wav' (79949 samples, 5.0 sec), 4 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ...


[00:00:00.000 --> 00:00:04.400]   I think the most important thing in running is to bring health to me.


whisper_print_timings:     load time =   184.65 ms
whisper_print_timings:     fallbacks =   0 p /   0 h
whisper_print_timings:      mel time =     6.42 ms
whisper_print_timings:   sample time =    19.27 ms /    71 runs (    0.27 ms per run)
whisper_print_timings:   encode time = 15536.68 ms /     1 runs (15536.68 ms per run)
whisper_print_timings:   decode time =    14.88 ms /     1 runs (   14.88 ms per run)
whisper_print_timings:   batchd time =   847.85 ms /    68 runs (   12.47 ms per run)
whisper_print_timings:   prompt time =     0.00 ms /     1 runs (    0.00 ms per run)
whisper_print_timings:    total time = 16613.97 ms

## ggml-small-q5_1.bin
$ ./main -m models/ggml-small-q5_1.bin -f ~/zh.wav
whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-small-q5_1.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 9
whisper_model_load: qntvr         = 1
whisper_model_load: type          = 3 (small)
whisper_model_load: adding 1608 extra tokens
whisper_model_load: n_langs       = 99
whisper_model_load:      CPU total size =   189.68 MB (1 buffers)
whisper_model_load: model size    =  189.49 MB
whisper_init_state: kv self size  =   49.55 MB
whisper_init_state: kv cross size =   55.30 MB
whisper_init_state: compute buffer (conv)   =   22.08 MB
whisper_init_state: compute buffer (encode) =  140.78 MB
whisper_init_state: compute buffer (cross)  =    6.77 MB
whisper_init_state: compute buffer (decode) =  106.97 MB

system_info: n_threads = 4 / 32 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | METAL = 0 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | CUDA = 0 | COREML = 0 | OPENVINO = 0 |

main: processing '/home/ubuntu/zh.wav' (79949 samples, 5.0 sec), 4 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ...


[00:00:00.000 --> 00:00:04.360]   I think the most important thing to do is to bring me a healthy body.


whisper_print_timings:     load time =    86.54 ms
whisper_print_timings:     fallbacks =   0 p /   0 h
whisper_print_timings:      mel time =     6.43 ms
whisper_print_timings:   sample time =    29.51 ms /    87 runs (    0.34 ms per run)
whisper_print_timings:   encode time =  4970.38 ms /     1 runs ( 4970.38 ms per run)
whisper_print_timings:   decode time =     0.00 ms /     1 runs (    0.00 ms per run)
whisper_print_timings:   batchd time =   398.29 ms /    85 runs (    4.69 ms per run)
whisper_print_timings:   prompt time =     0.00 ms /     1 runs (    0.00 ms per run)
whisper_print_timings:    total time =  5495.13 ms
	$ ./main -m models/ggml-base-q5_1.bin -f ~/zh.wav
	whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-base-q5_1.bin'
	whisper_model_load: loading model
	whisper_model_load: n_vocab = 51865
	whisper_model_load: n_audio_ctx = 1500
	whisper_model_load: n_audio_state = 512
	whisper_model_load: n_audio_head = 8
	whisper_model_load: n_audio_layer = 6
	whisper_model_load: n_text_ctx = 448
	whisper_model_load: n_text_state = 512
	whisper_model_load: n_text_head = 8
	whisper_model_load: n_text_layer = 6
	whisper_model_load: n_mels = 80
	whisper_model_load: ftype = 9
	whisper_model_load: qntvr = 1
	whisper_model_load: type = 2 (base)
	whisper_model_load: adding 1608 extra tokens
	whisper_model_load: n_langs = 99
	whisper_model_load: CPU total size = 59.22 MB (1 buffers)
	whisper_model_load: model size = 59.12 MB
	whisper_init_state: kv self size = 16.52 MB
	whisper_init_state: kv cross size = 18.43 MB
	whisper_init_state: compute buffer (conv) = 16.17 MB
	whisper_init_state: compute buffer (encode) = 94.42 MB
	whisper_init_state: compute buffer (cross) = 5.08 MB
	whisper_init_state: compute buffer (decode) = 105.96 MB

	system_info: n_threads = 4 / 32 \| AVX = 0 \| AVX2 = 0 \| AVX512 = 0 \| FMA = 0 \| NEON = 1 \| ARM_FMA = 1 \| METAL = 0 \| F16C = 0 \| FP16_VA = 1 \| WASM_SIMD = 0 \| BLAS = 0 \| SSE3 = 0 \| SSSE3 = 0 \| VSX = 0 \| CUDA = 0 \| COREML = 0 \| OPENVINO = 0 \|

	main: processing '/home/ubuntu/zh.wav' (79949 samples, 5.0 sec), 4 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ...


	[00:00:00.000 --> 00:00:04.480] I think running is the most important thing for me to see health.


	whisper_print_timings: load time = 53.78 ms
	whisper_print_timings: fallbacks = 0 p / 0 h
	whisper_print_timings: mel time = 6.41 ms
	whisper_print_timings: sample time = 27.94 ms / 82 runs ( 0.34 ms per run)
	whisper_print_timings: encode time = 1310.36 ms / 1 runs ( 1310.36 ms per run)
	whisper_print_timings: decode time = 4.54 ms / 2 runs ( 2.27 ms per run)
	whisper_print_timings: batchd time = 132.78 ms / 78 runs ( 1.70 ms per run)
	whisper_print_timings: prompt time = 0.00 ms / 1 runs ( 0.00 ms per run)
	whisper_print_timings: total time = 1539.46 ms
	./main -m models/ggml-large-v3-q5_0.bin -f ~/zh.wav
	whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-large-v3-q5_0.bin'
	whisper_model_load: loading model
	whisper_model_load: n_vocab = 51866
	whisper_model_load: n_audio_ctx = 1500
	whisper_model_load: n_audio_state = 1280
	whisper_model_load: n_audio_head = 20
	whisper_model_load: n_audio_layer = 32
	whisper_model_load: n_text_ctx = 448
	whisper_model_load: n_text_state = 1280
	whisper_model_load: n_text_head = 20
	whisper_model_load: n_text_layer = 32
	whisper_model_load: n_mels = 128
	whisper_model_load: ftype = 8
	whisper_model_load: qntvr = 2
	whisper_model_load: type = 5 (large v3)
	whisper_model_load: adding 1609 extra tokens
	whisper_model_load: n_langs = 100
	whisper_model_load: CPU total size = 1080.97 MB (2 buffers)
	whisper_model_load: model size = 1080.47 MB
	whisper_init_state: kv self size = 220.20 MB
	whisper_init_state: kv cross size = 245.76 MB
	whisper_init_state: compute buffer (conv) = 35.50 MB
	whisper_init_state: compute buffer (encode) = 233.50 MB
	whisper_init_state: compute buffer (cross) = 10.15 MB
	whisper_init_state: compute buffer (decode) = 108.99 MB

	system_info: n_threads = 4 / 32 \| AVX = 0 \| AVX2 = 0 \| AVX512 = 0 \| FMA = 0 \| NEON = 1 \| ARM_FMA = 1 \| METAL = 0 \| F16C = 0 \| FP16_VA = 1 \| WASM_SIMD = 0 \| BLAS = 0 \| SSE3 = 0 \| SSSE3 = 0 \| VSX = 0 \| CUDA = 0 \| COREML = 0 \| OPENVINO = 0 \|

	main: processing '/home/ubuntu/zh.wav' (79949 samples, 5.0 sec), 4 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ...


	[00:00:00.000 --> 00:00:05.000] I think running is most important because it brings me health.


	whisper_print_timings: load time = 326.38 ms
	whisper_print_timings: fallbacks = 0 p / 0 h
	whisper_print_timings: mel time = 7.40 ms
	whisper_print_timings: sample time = 24.98 ms / 72 runs ( 0.35 ms per run)
	whisper_print_timings: encode time = 29965.32 ms / 1 runs (29965.32 ms per run)
	whisper_print_timings: decode time = 27.04 ms / 1 runs ( 27.04 ms per run)
	whisper_print_timings: batchd time = 1586.20 ms / 69 runs ( 22.99 ms per run)
	whisper_print_timings: prompt time = 0.00 ms / 1 runs ( 0.00 ms per run)
	whisper_print_timings: total time = 31941.65 ms
	./main -m models/ggml-medium-q5_0.bin -f ~/zh.wav
	whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-medium-q5_0.bin'
	whisper_model_load: loading model
	whisper_model_load: n_vocab = 51865
	whisper_model_load: n_audio_ctx = 1500
	whisper_model_load: n_audio_state = 1024
	whisper_model_load: n_audio_head = 16
	whisper_model_load: n_audio_layer = 24
	whisper_model_load: n_text_ctx = 448
	whisper_model_load: n_text_state = 1024
	whisper_model_load: n_text_head = 16
	whisper_model_load: n_text_layer = 24
	whisper_model_load: n_mels = 80
	whisper_model_load: ftype = 8
	whisper_model_load: qntvr = 1
	whisper_model_load: type = 4 (medium)
	whisper_model_load: adding 1608 extra tokens
	whisper_model_load: n_langs = 99
	whisper_model_load: CPU total size = 538.97 MB (1 buffers)
	whisper_model_load: model size = 538.59 MB
	whisper_init_state: kv self size = 132.12 MB
	whisper_init_state: kv cross size = 147.46 MB
	whisper_init_state: compute buffer (conv) = 28.00 MB
	whisper_init_state: compute buffer (encode) = 187.14 MB
	whisper_init_state: compute buffer (cross) = 8.46 MB
	whisper_init_state: compute buffer (decode) = 107.98 MB

	system_info: n_threads = 4 / 32 \| AVX = 0 \| AVX2 = 0 \| AVX512 = 0 \| FMA = 0 \| NEON = 1 \| ARM_FMA = 1 \| METAL = 0 \| F16C = 0 \| FP16_VA = 1 \| WASM_SIMD = 0 \| BLAS = 0 \| SSE3 = 0 \| SSSE3 = 0 \| VSX = 0 \| CUDA = 0 \| COREML = 0 \| OPENVINO = 0 \|

	main: processing '/home/ubuntu/zh.wav' (79949 samples, 5.0 sec), 4 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ...


	[00:00:00.000 --> 00:00:04.400] I think the most important thing in running is to bring health to me.


	whisper_print_timings: load time = 184.65 ms
	whisper_print_timings: fallbacks = 0 p / 0 h
	whisper_print_timings: mel time = 6.42 ms
	whisper_print_timings: sample time = 19.27 ms / 71 runs ( 0.27 ms per run)
	whisper_print_timings: encode time = 15536.68 ms / 1 runs (15536.68 ms per run)
	whisper_print_timings: decode time = 14.88 ms / 1 runs ( 14.88 ms per run)
	whisper_print_timings: batchd time = 847.85 ms / 68 runs ( 12.47 ms per run)
	whisper_print_timings: prompt time = 0.00 ms / 1 runs ( 0.00 ms per run)
	whisper_print_timings: total time = 16613.97 ms
	$ ./main -m models/ggml-small-q5_1.bin -f ~/zh.wav
	whisper_init_from_file_with_params_no_state: loading model from 'models/ggml-small-q5_1.bin'
	whisper_model_load: loading model
	whisper_model_load: n_vocab = 51865
	whisper_model_load: n_audio_ctx = 1500
	whisper_model_load: n_audio_state = 768
	whisper_model_load: n_audio_head = 12
	whisper_model_load: n_audio_layer = 12
	whisper_model_load: n_text_ctx = 448
	whisper_model_load: n_text_state = 768
	whisper_model_load: n_text_head = 12
	whisper_model_load: n_text_layer = 12
	whisper_model_load: n_mels = 80
	whisper_model_load: ftype = 9
	whisper_model_load: qntvr = 1
	whisper_model_load: type = 3 (small)
	whisper_model_load: adding 1608 extra tokens
	whisper_model_load: n_langs = 99
	whisper_model_load: CPU total size = 189.68 MB (1 buffers)
	whisper_model_load: model size = 189.49 MB
	whisper_init_state: kv self size = 49.55 MB
	whisper_init_state: kv cross size = 55.30 MB
	whisper_init_state: compute buffer (conv) = 22.08 MB
	whisper_init_state: compute buffer (encode) = 140.78 MB
	whisper_init_state: compute buffer (cross) = 6.77 MB
	whisper_init_state: compute buffer (decode) = 106.97 MB

	system_info: n_threads = 4 / 32 \| AVX = 0 \| AVX2 = 0 \| AVX512 = 0 \| FMA = 0 \| NEON = 1 \| ARM_FMA = 1 \| METAL = 0 \| F16C = 0 \| FP16_VA = 1 \| WASM_SIMD = 0 \| BLAS = 0 \| SSE3 = 0 \| SSSE3 = 0 \| VSX = 0 \| CUDA = 0 \| COREML = 0 \| OPENVINO = 0 \|

	main: processing '/home/ubuntu/zh.wav' (79949 samples, 5.0 sec), 4 threads, 1 processors, 5 beams + best of 5, lang = en, task = transcribe, timestamps = 1 ...


	[00:00:00.000 --> 00:00:04.360] I think the most important thing to do is to bring me a healthy body.


	whisper_print_timings: load time = 86.54 ms
	whisper_print_timings: fallbacks = 0 p / 0 h
	whisper_print_timings: mel time = 6.43 ms
	whisper_print_timings: sample time = 29.51 ms / 87 runs ( 0.34 ms per run)
	whisper_print_timings: encode time = 4970.38 ms / 1 runs ( 4970.38 ms per run)
	whisper_print_timings: decode time = 0.00 ms / 1 runs ( 0.00 ms per run)
	whisper_print_timings: batchd time = 398.29 ms / 85 runs ( 4.69 ms per run)
	whisper_print_timings: prompt time = 0.00 ms / 1 runs ( 0.00 ms per run)
	whisper_print_timings: total time = 5495.13 ms