Created
October 7, 2019 20:33
-
-
Save carlfm01/fd69a8ca2784837dabf9375d35258953 to your computer and use it in GitHub Desktop.
C# DeepSpeech same file test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using DeepSpeechClient; | |
using DeepSpeechClient.Interfaces; | |
using DeepSpeechClient.Models; | |
using NAudio.Wave; | |
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Linq; | |
namespace CSharpExamples | |
{ | |
class Program | |
{ | |
/// <summary> | |
/// Get the value of an argurment. | |
/// </summary> | |
/// <param name="args">Argument list.</param> | |
/// <param name="option">Key of the argument.</param> | |
/// <returns>Value of the argument.</returns> | |
static string GetArgument(IEnumerable<string> args, string option) | |
=> args.SkipWhile(i => i != option).Skip(1).Take(1).FirstOrDefault(); | |
static string MetadataToString(Metadata meta) | |
{ | |
var nl = Environment.NewLine; | |
string retval = | |
Environment.NewLine + $"Recognized text: {string.Join("", meta?.Items?.Select(x => x.Character))} {nl}" | |
+ $"Confidence: {meta?.Confidence} {nl}" | |
+ $"Item count: {meta?.Items?.Length} {nl}" | |
+ string.Join(nl, meta?.Items?.Select(x => $"Timestep : {x.Timestep} TimeOffset: {x.StartTime} Char: {x.Character}")); | |
return retval; | |
} | |
static void Main(string[] args) | |
{ | |
string model = null; | |
string alphabet = null; | |
string lm = null; | |
string trie = null; | |
string audio = null; | |
bool extended = false; | |
if (args.Length > 0) | |
{ | |
model = GetArgument(args, "--model"); | |
alphabet = GetArgument(args, "--alphabet"); | |
lm = GetArgument(args, "--lm"); | |
trie = GetArgument(args, "--trie"); | |
audio = GetArgument(args, "--audio"); | |
extended = !string.IsNullOrWhiteSpace(GetArgument(args, "--extended")); | |
} | |
const uint BEAM_WIDTH = 500; | |
const float LM_ALPHA = 0.75f; | |
const float LM_BETA = 1.85f; | |
Stopwatch stopwatch = new Stopwatch(); | |
for (int i = 0; i < 20; i++) | |
{ | |
using (IDeepSpeech sttClient = new DeepSpeech()) | |
{ | |
try | |
{ | |
Console.WriteLine("Loading model..."); | |
stopwatch.Start(); | |
sttClient.CreateModel( | |
model ?? "output_graph.pbmm", | |
aAlphabetConfigPath: alphabet ?? "alphabet.txt", | |
aBeamWidth: BEAM_WIDTH); | |
stopwatch.Stop(); | |
Console.WriteLine($"Model loaded - {stopwatch.Elapsed.Milliseconds} ms"); | |
stopwatch.Reset(); | |
lm = "lm.binary"; | |
if (lm != null) | |
{ | |
Console.WriteLine("Loadin LM..."); | |
sttClient.EnableDecoderWithLM( | |
lm ?? "lm.binary", | |
trie ?? "", | |
LM_ALPHA, LM_BETA); | |
} | |
string audioFile = audio ?? "arctic_a0024.wav"; | |
var waveBuffer = new WaveBuffer(File.ReadAllBytes(audioFile)); | |
using (var waveInfo = new WaveFileReader(audioFile)) | |
{ | |
Console.WriteLine("Running inference...."); | |
stopwatch.Start(); | |
string speechResult; | |
if (extended) | |
{ | |
Metadata metaResult = sttClient.SpeechToTextWithMetadata(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000); | |
speechResult = MetadataToString(metaResult); | |
} | |
else | |
{ | |
speechResult = sttClient.SpeechToText(waveBuffer.ShortBuffer, Convert.ToUInt32(waveBuffer.MaxSize / 2), 16000); | |
} | |
stopwatch.Stop(); | |
Console.WriteLine($"Audio duration: {waveInfo.TotalTime.ToString()}"); | |
Console.WriteLine($"Inference took: {stopwatch.Elapsed.ToString()}"); | |
Console.WriteLine((extended ? $"Extended result: " : "Recognized text: ") + speechResult); | |
} | |
waveBuffer.Clear(); | |
} | |
catch (Exception ex) | |
{ | |
Console.WriteLine(ex.Message); | |
} | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment