Created
January 9, 2014 16:29
-
-
Save agoodman/8337134 to your computer and use it in GitHub Desktop.
Split one WAV file into many clips of 30sec duration
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Submit all WAV clips to AT&T Speech API | |
require 'thread' | |
require 'rest-client' | |
require 'json' | |
src_hash = ARGV[0] | |
index = ARGV[1].to_i | |
bearer_token = ARGV[2] | |
url = "https://api.att.com/speech/v3/speechToText" | |
puts "using hash #{src_hash}, count #{index}" | |
class Worker | |
def initialize(n,hash,url,token) | |
@index = n | |
@hash = hash | |
@url = url | |
@token = token | |
end | |
def run | |
file_name = "#{@hash}-#{"%02d" % @index}" | |
if FileTest.exists?("#{file_name}.json") | |
puts "skipping segment #{@index}" | |
return | |
else | |
puts "processing segment #{@index}" | |
end | |
res = RestClient::Resource.new(@url) | |
rsp = res.post File.open("#{file_name}.wav", "r"), | |
"Accept" => "application/json", | |
"Content-Type" => "audio/wav", | |
"Authorization" => "Bearer #{@token}" | |
if rsp.body.length>0 | |
file = File.open("#{file_name}.json", "w") | |
file.write(rsp.body) | |
file.close | |
else | |
puts "API response empty for segment #{@index}" | |
end | |
rescue => e | |
puts "error encountered: #{e}" | |
puts rsp.body | |
end | |
end | |
queue, processed = Queue.new, Queue.new | |
for n in (1..index) | |
queue << Worker.new(n, src_hash, url, bearer_token) | |
end | |
10.times do | |
Thread.new { processed << queue.pop.run until queue.empty? } | |
end | |
while processed.length<index do | |
sleep 1 | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# Ruby JSON processor for results from AT&T Speech API | |
require 'json' | |
src_hash = ARGV[0] | |
index = ARGV[1].to_i | |
all_words = [] | |
all_scores = [] | |
index.times do |n| | |
file = File.open("#{src_hash}-#{"%02d" % (n+1)}.json") | |
json = JSON.parse(file.read) | |
nbest = json['Recognition']['NBest'] | |
if nbest | |
words = nbest[0]['Words'] | |
scores = nbest[0]['WordScores'] | |
all_words = all_words + words.map {|e| e.gsub(/\./,'')} | |
all_scores = all_scores + scores | |
end | |
end | |
puts "transcript:" | |
puts all_words.join(" ") | |
threshold = 0.5 | |
meaningful_words = all_words.select.with_index {|e,k| all_scores[k]>threshold} - ['the', 'of', 'with', 'for', 'that', 'you', 'and', 'in', 'to', 'a', 'about', 'is', 'I', 'on', 'be', 'like', 'as', 'this', 'but', 'we', 'really', 'have', 'know', 'all', 'if', 'can', 'not', 'they', 'are', 'very', 'because', 'it', 'do', 'them', 'get', 'more', 'will', 'your', 'so', 'how', 'one', 'at', 'something', 'right'] | |
puts "\n\nfound #{meaningful_words.uniq.count} unique meaningful words" | |
puts "top 25:" | |
raw_counts = Hash[meaningful_words.uniq.map {|e| [e,meaningful_words.select {|n| n==e}.count]}] | |
top_words = meaningful_words.uniq.sort {|a,b| 0.75 * raw_counts[b] + 0.75 * b.length <=> 0.75 * raw_counts[a] + 0.75 * a.length}.take(25) | |
puts top_words.map {|e| "#{raw_counts[e]} #{e}"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
# Process a WAV file into short clips | |
require 'wavefile' | |
require 'digest' | |
include WaveFile | |
# read input file and deconstruct into 30sec chunks | |
src_file_name = ARGV[0] | |
src_hash = Digest::SHA1.hexdigest(src_file_name)[0..9] | |
reader = Reader.new(src_file_name) | |
writer = nil | |
index = 1 | |
begin | |
total = 0 | |
writer = Writer.new("#{src_hash}-#{"%02d" % index}.wav", Format.new(:mono, :pcm_16, 32000)) | |
while true do | |
buffer = reader.read(4096) | |
writer.write(buffer) | |
total = total + buffer.samples.count | |
if total >= 30 * 32000 | |
total = 0 | |
index = index + 1 | |
writer.close | |
putc "." | |
writer = Writer.new("#{src_hash}-#{"%02d" % index}.wav", Format.new(:mono, :pcm_16, 32000)) | |
end | |
end | |
rescue EOFError | |
reader.close | |
writer.close | |
end | |
puts | |
puts "using src hash #{src_hash}, split into #{index} files" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment