miyagawa/google-speech2text.rb

## google-speech2text.rb
#!/usr/bin/env ruby
# coding: utf-8
require 'json'

table = {
  "じかん" => "時間",
  "じ" => "時",
  "ぱーせんと" => "%",
  "えん" => "円",
  "どる" => "ドル",
  "まん" => "万",
  "ふん" => "分",
  "ぷん" => "分",
  "まえ" => "前",
  "ご" => "後",
  "はん" => "半",
  "おく" => "0000万",
  "まいなす" => "-",
  "の" => "-",
}

r = JSON.load(File.open(ARGV.shift))
r["response"]["results"].each do |result|
  text  = result["alternatives"][0]["transcript"]
  words = result["alternatives"][0]["words"]

  leftover = ""
  count = 0
  words.each do |word|
    finish = word["endTime"].sub('s', '').to_f
    start  = word["startTime"].sub('s', '').to_f

    match = word["word"].split("|").first.gsub('_', ' ')

    if table[match]
      match = "(#{match}|#{Regexp.escape(table[match])})"
    else
      match = Regexp.escape(match)
    end

    res = text.sub!(/^\s*(#{leftover}#{match})\s*/i) do |m|
      leftover = ""
      count = 0

      ''
    end

    unless res
      leftover = '[\d\.]*\s*'
      count += 1

      if count > 16
        p [word, text]
        break
      end
    end
  end
end
	#!/usr/bin/env ruby
	# coding: utf-8
	require 'json'

	table = {
	"じかん" => "時間",
	"じ" => "時",
	"ぱーせんと" => "%",
	"えん" => "円",
	"どる" => "ドル",
	"まん" => "万",
	"ふん" => "分",
	"ぷん" => "分",
	"まえ" => "前",
	"ご" => "後",
	"はん" => "半",
	"おく" => "0000万",
	"まいなす" => "-",
	"の" => "-",
	}

	r = JSON.load(File.open(ARGV.shift))
	r["response"]["results"].each do \|result\|
	text = result["alternatives"][0]["transcript"]
	words = result["alternatives"][0]["words"]

	leftover = ""
	count = 0
	words.each do \|word\|
	finish = word["endTime"].sub('s', '').to_f
	start = word["startTime"].sub('s', '').to_f

	match = word["word"].split("\|").first.gsub('_', ' ')

	if table[match]
	match = "(#{match}\|#{Regexp.escape(table[match])})"
	else
	match = Regexp.escape(match)
	end

	res = text.sub!(/^\s(#{leftover}#{match})\s/i) do \|m\|
	leftover = ""
	count = 0

	''
	end

	unless res
	leftover = '[\d\.]\s'
	count += 1

	if count > 16
	p [word, text]
	break
	end
	end
	end
	end