Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
preprocess google speech-to-text wordoffsets
#!/usr/bin/env ruby
# coding: utf-8
require 'json'
table = {
"じかん" => "時間",
"じ" => "時",
"ぱーせんと" => "%",
"えん" => "円",
"どる" => "ドル",
"まん" => "万",
"ふん" => "分",
"ぷん" => "分",
"まえ" => "前",
"ご" => "後",
"はん" => "半",
"おく" => "0000万",
"まいなす" => "-",
"の" => "-",
}
r = JSON.load(File.open(ARGV.shift))
r["response"]["results"].each do |result|
text = result["alternatives"][0]["transcript"]
words = result["alternatives"][0]["words"]
leftover = ""
count = 0
words.each do |word|
finish = word["endTime"].sub('s', '').to_f
start = word["startTime"].sub('s', '').to_f
match = word["word"].split("|").first.gsub('_', ' ')
if table[match]
match = "(#{match}|#{Regexp.escape(table[match])})"
else
match = Regexp.escape(match)
end
res = text.sub!(/^\s*(#{leftover}#{match})\s*/i) do |m|
leftover = ""
count = 0
''
end
unless res
leftover = '[\d\.]*\s*'
count += 1
if count > 16
p [word, text]
break
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment