agricolamz/speech_to_text_with_audio.whisper.R

## speech_to_text_with_audio.whisper.R
speech_to_text <- function(audio,
                           output_name = "output",
                           model_path = "ggml-large-v3.bin"){
  library(tidyverse)
  library(audio.whisper)

# convert to the format specs ---------------------------------------------
  tmp <- tempdir()
  str_glue("ffmpeg -i {audio} -ar 16000 -ac 1 -c:a pcm_s16le {tmp}/{output_name}.wav") |>
    system()

# load and run model ------------------------------------------------------
  model <- whisper(model_path)
  result <- predict(model,
                    str_glue("{tmp}/{output_name}.wav"),
                    language = "ru")

# write down results ------------------------------------------------------
  result$data |>
    as.data.frame() |>
    write_csv(str_glue("{output_name}.csv"))
  result$params
  print(result$timing)
}

speech_to_text(audio = "test.wav", output_name = "test")
	speech_to_text <- function(audio,
	output_name = "output",
	model_path = "ggml-large-v3.bin"){
	library(tidyverse)
	library(audio.whisper)

	# convert to the format specs ---------------------------------------------
	tmp <- tempdir()
	str_glue("ffmpeg -i {audio} -ar 16000 -ac 1 -c:a pcm_s16le {tmp}/{output_name}.wav") \|>
	system()

	# load and run model ------------------------------------------------------
	model <- whisper(model_path)
	result <- predict(model,
	str_glue("{tmp}/{output_name}.wav"),
	language = "ru")

	# write down results ------------------------------------------------------
	result$data \|>
	as.data.frame() \|>
	write_csv(str_glue("{output_name}.csv"))
	result$params
	print(result$timing)
	}

	speech_to_text(audio = "test.wav", output_name = "test")