damithsj/CognitiveServices.cs

## CognitiveServices.cs
using Azure.Core;
using Microsoft.AspNetCore.Http;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Azure.Functions.Worker;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Microsoft.Extensions.Logging;
using Newtonsoft.Json.Linq;
using System.Diagnostics;


namespace azure_ai_services
{
    public class CognitiveServices
    {
        static string speechKey = "YOUR_SPEECH_KEY";
        static string speechRegion = "your_speech_region";

        private readonly ILogger<CognitiveServices> _logger;

        public CognitiveServices(ILogger<CognitiveServices> logger)
        {
            _logger = logger;
        }

        //--------------------------------------------------------------------------
        //------ Speech to text ----------------------------------------------------
        //--------------------------------------------------------------------------
        [Function("Stt")]
        public async Task<IActionResult> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post")] HttpRequest req)
        {
            _logger.LogInformation("C# HTTP trigger function processed a request.");


            var tempPath = Path.GetTempPath();
            var tempIn = Path.GetRandomFileName() + ".tmp"; // For FFMPeg input file name can be anything
            tempIn = Path.Combine(tempPath, tempIn);

            var tempOut = Path.GetRandomFileName() + ".wav";
            tempOut = Path.Combine(tempPath, tempOut);

            using (var ms = new MemoryStream())
            {
                _logger.LogInformation($"File write start: {tempIn}");
                await req.Body.CopyToAsync(ms);
                File.WriteAllBytes(tempIn, ms.ToArray());

                ms.Dispose();
                _logger.LogInformation($"File write finished: {tempIn}");
            }

            Process process = new Process();
            //Azure path COMMENT FOR LOCAL TESTING
            process.StartInfo.FileName = @"C:\home\site\wwwroot\executables\ffmpeg.exe";
            //Local path
            //process.StartInfo.FileName = @"D:\_work\dsj23\repos\azure-ai-services\azure-ai-services\executables\ffmpeg.exe";

            process.StartInfo.Arguments = $"-i \"{tempIn}\" \"{tempOut}\"";

            process.StartInfo.RedirectStandardOutput = true;
            process.StartInfo.RedirectStandardError = true;
            process.StartInfo.UseShellExecute = false;

            _logger.LogInformation($"Args: {process.StartInfo.Arguments}");

            process.Start();

            process.WaitForExit();
            var error_ = await process.StandardError.ReadToEndAsync();
            // _logger.LogInformation($"FFMPEG Info: {error_}");

            process.Dispose();

            _logger.LogInformation($"File conversion finished: {tempOut}");


            //Now comes the interesting part. SPEECH TO TEXT
            var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
            speechConfig.SpeechRecognitionLanguage = "en-US";
            var audioConfig = AudioConfig.FromWavFileInput(tempOut);
            using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
            var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();

            audioConfig.Dispose();
            speechRecognizer.Dispose();

            _logger.LogInformation($"STT Result: {speechRecognitionResult.Text}");

            // Delete the temp files

            File.Delete(tempOut);
            File.Delete(tempIn);

            // Create response payload

            JObject response = new JObject
            {
                { "DisplayText", speechRecognitionResult.Text },
                { "Duration",  speechRecognitionResult.Duration},
            };

            // Send the response
            // TODO: this will send the response in Text format. need to set the content type
            OkObjectResult okResponse_ = new OkObjectResult(response.ToString());
            return okResponse_;
        }
    }
}
	using Azure.Core;
	using Microsoft.AspNetCore.Http;
	using Microsoft.AspNetCore.Mvc;
	using Microsoft.Azure.Functions.Worker;
	using Microsoft.CognitiveServices.Speech;
	using Microsoft.CognitiveServices.Speech.Audio;
	using Microsoft.Extensions.Logging;
	using Newtonsoft.Json.Linq;
	using System.Diagnostics;


	namespace azure_ai_services
	{
	public class CognitiveServices
	{
	static string speechKey = "YOUR_SPEECH_KEY";
	static string speechRegion = "your_speech_region";

	private readonly ILogger<CognitiveServices> _logger;

	public CognitiveServices(ILogger<CognitiveServices> logger)
	{
	_logger = logger;
	}

	//--------------------------------------------------------------------------
	//------ Speech to text ----------------------------------------------------
	//--------------------------------------------------------------------------
	[Function("Stt")]
	public async Task<IActionResult> Run([HttpTrigger(AuthorizationLevel.Anonymous, "post")] HttpRequest req)
	{
	_logger.LogInformation("C# HTTP trigger function processed a request.");


	var tempPath = Path.GetTempPath();
	var tempIn = Path.GetRandomFileName() + ".tmp"; // For FFMPeg input file name can be anything
	tempIn = Path.Combine(tempPath, tempIn);

	var tempOut = Path.GetRandomFileName() + ".wav";
	tempOut = Path.Combine(tempPath, tempOut);

	using (var ms = new MemoryStream())
	{
	_logger.LogInformation($"File write start: {tempIn}");
	await req.Body.CopyToAsync(ms);
	File.WriteAllBytes(tempIn, ms.ToArray());

	ms.Dispose();
	_logger.LogInformation($"File write finished: {tempIn}");
	}

	Process process = new Process();
	//Azure path COMMENT FOR LOCAL TESTING
	process.StartInfo.FileName = @"C:\home\site\wwwroot\executables\ffmpeg.exe";
	//Local path
	//process.StartInfo.FileName = @"D:\_work\dsj23\repos\azure-ai-services\azure-ai-services\executables\ffmpeg.exe";

	process.StartInfo.Arguments = $"-i \"{tempIn}\" \"{tempOut}\"";

	process.StartInfo.RedirectStandardOutput = true;
	process.StartInfo.RedirectStandardError = true;
	process.StartInfo.UseShellExecute = false;

	_logger.LogInformation($"Args: {process.StartInfo.Arguments}");

	process.Start();

	process.WaitForExit();
	var error_ = await process.StandardError.ReadToEndAsync();
	// _logger.LogInformation($"FFMPEG Info: {error_}");

	process.Dispose();

	_logger.LogInformation($"File conversion finished: {tempOut}");


	//Now comes the interesting part. SPEECH TO TEXT
	var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
	speechConfig.SpeechRecognitionLanguage = "en-US";
	var audioConfig = AudioConfig.FromWavFileInput(tempOut);
	using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
	var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();

	audioConfig.Dispose();
	speechRecognizer.Dispose();

	_logger.LogInformation($"STT Result: {speechRecognitionResult.Text}");

	// Delete the temp files

	File.Delete(tempOut);
	File.Delete(tempIn);

	// Create response payload

	JObject response = new JObject
	{
	{ "DisplayText", speechRecognitionResult.Text },
	{ "Duration", speechRecognitionResult.Duration},
	};

	// Send the response
	// TODO: this will send the response in Text format. need to set the content type
	OkObjectResult okResponse_ = new OkObjectResult(response.ToString());
	return okResponse_;
	}
	}
	}