Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tikhonova/dcba7b134a4101103bfafa5d70d09e00 to your computer and use it in GitHub Desktop.
Save tikhonova/dcba7b134a4101103bfafa5d70d09e00 to your computer and use it in GitHub Desktop.
# snippet of instantiating a client
client = speech.SpeechClient()
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=22050,
audio_channel_count=1,
model='phone_call', # recognizes low quality audio better than default
use_enhanced=1, # if available
language_code="en-US")
# WER
import jiwer
with open("E:/AlanWatts/dataset/ground_truth.txt") as f:
ground_truth = f.readlines()
with open("E:/AlanWatts/dataset/transcripts2/6_5_Zen_Mind_for_Beginners_I.mp3_2022-11-19_19_52_45.txt") as f:
hypothesis = f.readlines()
transformation = jiwer.Compose([
jiwer.ToLowerCase(),
jiwer.RemovePunctuation(),
jiwer.ExpandCommonEnglishContractions(),
jiwer.RemoveMultipleSpaces(),
jiwer.Strip(),
jiwer.RemoveWhiteSpace(replace_by_space=True),
jiwer.RemoveMultipleSpaces(),
jiwer.RemoveEmptyStrings(),
jiwer.ReduceToSingleSentence(),
jiwer.ReduceToListOfListOfWords(word_delimiter=" ")
]) # (hypothesis)
wer = jiwer.wer(ground_truth,
hypothesis,
truth_transform=transformation,
hypothesis_transform=transformation)
print(wer)
# 0-20% word error rate based on just a couple clips;
# for comparison, Autosub often returned completely incorrect or blank transcriptions;
# via https://github.com/tikhonova/what_would_alan_watts_say/blob/master/speech_synthesis/4_audio_Google_transcribe_with_multiprocessing.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment