Skip to content

Instantly share code, notes, and snippets.

@damithsj
Created April 16, 2024 10:20
Show Gist options
  • Save damithsj/abe20dda077c937a13ae0839748da7d0 to your computer and use it in GitHub Desktop.
Save damithsj/abe20dda077c937a13ae0839748da7d0 to your computer and use it in GitHub Desktop.
Azure Function for Azure Speech Services Speech Text which Inputs Any Audio Format
using Azure.Core;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.Functions.Worker;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.Extensions.Logging;
using Newtonsoft.Json.Linq;
using System.Diagnostics;
namespace azure_ai_services
{
public class CognitiveServices
{
static string speechKey = "YOUR_SPEECH_KEY";
static string speechRegion = "your_speech_region";
private readonly ILogger<CognitiveServices> _logger;
public CognitiveServices(ILogger<CognitiveServices> logger)
{
_logger = logger;
}
//--------------------------------------------------------------------------
//------ Speech to text ----------------------------------------------------
//--------------------------------------------------------------------------
[Function("Stt")]
public async Task<IActionResult> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post")] HttpRequest req)
{
_logger.LogInformation("C# HTTP trigger function processed a request.");
var tempPath = Path.GetTempPath();
var tempIn = Path.GetRandomFileName() + ".tmp"; // For FFMPeg input file name can be anything
tempIn = Path.Combine(tempPath, tempIn);
var tempOut = Path.GetRandomFileName() + ".wav";
tempOut = Path.Combine(tempPath, tempOut);
using (var ms = new MemoryStream())
{
_logger.LogInformation($"File write start: {tempIn}");
await req.Body.CopyToAsync(ms);
File.WriteAllBytes(tempIn, ms.ToArray());
ms.Dispose();
_logger.LogInformation($"File write finished: {tempIn}");
}
Process process = new Process();
//Azure path COMMENT FOR LOCAL TESTING
process.StartInfo.FileName = @"C:\home\site\wwwroot\executables\ffmpeg.exe";
//Local path
//process.StartInfo.FileName = @"D:\_work\dsj23\repos\azure-ai-services\azure-ai-services\executables\ffmpeg.exe";
process.StartInfo.Arguments = $"-i \"{tempIn}\" \"{tempOut}\"";
process.StartInfo.RedirectStandardOutput = true;
process.StartInfo.RedirectStandardError = true;
process.StartInfo.UseShellExecute = false;
_logger.LogInformation($"Args: {process.StartInfo.Arguments}");
process.Start();
process.WaitForExit();
var error_ = await process.StandardError.ReadToEndAsync();
// _logger.LogInformation($"FFMPEG Info: {error_}");
process.Dispose();
_logger.LogInformation($"File conversion finished: {tempOut}");
//Now comes the interesting part. SPEECH TO TEXT
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
speechConfig.SpeechRecognitionLanguage = "en-US";
var audioConfig = AudioConfig.FromWavFileInput(tempOut);
using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();
audioConfig.Dispose();
speechRecognizer.Dispose();
_logger.LogInformation($"STT Result: {speechRecognitionResult.Text}");
// Delete the temp files
File.Delete(tempOut);
File.Delete(tempIn);
// Create response payload
JObject response = new JObject
{
{ "DisplayText", speechRecognitionResult.Text },
{ "Duration", speechRecognitionResult.Duration},
};
// Send the response
// TODO: this will send the response in Text format. need to set the content type
OkObjectResult okResponse_ = new OkObjectResult(response.ToString());
return okResponse_;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment