-
-
Save wchen-r7/7c3c61b293b4f78451d6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
## | |
# | |
# This program will convert a collection of WAV files generated by Metasploit's record_mic to | |
# text files. It will also do a brief analysis on these text files and see which ones are | |
# interesting. | |
# | |
# AT&T Speech API | |
# https://devconnect-api.att.com/sdm/my-free-trial-access | |
# | |
# by sinn3r | |
# twitter.com/_sinn3r | |
# Metasploit.com | |
# | |
## | |
require 'optparse' | |
class WavError < StandardError | |
def initialize(msg) | |
super(msg) | |
end | |
end | |
def init | |
begin | |
require 'att_speech' | |
rescue LoadError | |
puts "[x] You don't have att_speech intalled" | |
return | |
end | |
main | |
end | |
def parse_args | |
opts = {} | |
opt = OptionParser.new | |
opt.banner = "Usage: #{__FILE__} [options]" | |
opt.separator('') | |
opt.separator('Options:') | |
opt.on('-i', '--input [folder]', String, 'Folder that contains the wav files') do |input| | |
if !File.directory?(input) | |
raise ArgumentError, "Input isn't a directory" | |
end | |
opts[:input_dir] = input | |
end | |
opt.on('-o', '--out [folder]', String, 'Folder that stores the text files') do |output| | |
opts[:output_dir] = File.expand_path(output) | |
end | |
opt.on('-a', '--api [string]', String, 'ATT Speech API Key') do |api| | |
opts[:api_key] = api | |
end | |
opt.on('-s', '--secret [string]', String, 'ATT Speech Secret Key') do |secret| | |
opts[:secret_key] = secret | |
end | |
opt.on_tail('-h', '--help', 'Show usage') do | |
puts opt | |
exit(0) | |
end | |
begin | |
opt.parse! | |
rescue OptionParser::InvalidOption, OptionParser::MissingArgument | |
puts "Invalid option. See -h for usage" | |
exit(0) | |
end | |
if opts.empty? | |
puts opt | |
exit(0) | |
end | |
if opts[:secret_key].to_s.empty? or opts[:api_key].to_s.empty? | |
puts "ATT Speech API key and Secret key are required" | |
exit(0) | |
end | |
return opts | |
end | |
# | |
# Converts a WAV file to text | |
# | |
def wav_to_text(data, opts) | |
att = ATTSpeech.new({ | |
:api_key => opts[:api_key], | |
:secret_key => opts[:secret_key] | |
}) | |
data = att.speech_to_text(data, type='audio/wav').to_hash | |
c = data['recognition']['n_best']['confidence'] | |
t = data['recognition']['n_best']['result_text'] | |
return c, t | |
end | |
# | |
# Metasploit's record_mic produces a low-quality WAV file. In order to use the AT&T speech API, | |
# we must meet the WAV requirements: | |
# * 16 bit PCM WAV, single channel, 8 kHz sampling, or | |
# * 16 bit PCM WAV, single channel, 16 kHz sampling | |
# | |
def convert_wav(input_file) | |
# OSX: | |
# afconvert -f WAVE -d LEI16@8000 test.wav -o blah.wav | |
# Linux: | |
# ffmpeg -i [wav_file] -ar 8000 [new_file] | |
# | |
fname = File.basename(input_file) | |
`afconvert -f WAVE -d LEI16@8000 #{input_file} -o /tmp/#{fname}.tmp` | |
#`ffmpeg -i #{input_file} -ar 8000 /tmp/#{fname}.tmp` | |
buf = load_wav("/tmp/#{fname}.tmp") | |
File.delete("/tmp/#{fname}.tmp") | |
return buf | |
end | |
# | |
# Loads a WAV file | |
# | |
def load_wav(input_file) | |
f = File.open(input_file, 'rb') | |
buf = f.read | |
f.close | |
raise WavError, "Not a valid WAV format" if buf !~ /^RIFF/ | |
return buf | |
end | |
# | |
# Saves a file | |
# | |
def save(text, fname, out_dir) | |
fname = File.basename(fname, ".wav") | |
p = "#{out_dir}/#{fname}.txt" | |
f = File.open(p, 'wb') | |
f.write(text) | |
f.close | |
end | |
# | |
# Finds all the WAV files under a directory | |
# | |
def enum_wavs(input_dir) | |
wavs = [] | |
Dir["#{input_dir}/**/**"].each do |file| | |
next if File.extname(file) !~ /\.wav$/i | |
wavs << File.expand_path(file) | |
end | |
return wavs | |
end | |
# | |
# Analysis a file. If the data is interesting, return true, otherwise false. | |
# | |
def analyze(data) | |
keywords = %W{ | |
password username pass social-security pin-number passcode key passphrase metasploit | |
lastpass keypass keychain certificate account vpn birthday administrator admin | |
} | |
keywords.each do |keyword| | |
return true if data.include?(keyword) | |
end | |
return false | |
end | |
# | |
# Main | |
# | |
def main | |
opts = parse_args | |
wavs = enum_wavs(opts[:input_dir]) | |
wavs.each do |w| | |
puts "[*] Converting WAV quality: #{w}" | |
begin | |
data = convert_wav(w) | |
rescue WavError => e | |
puts "[x] Error: #{e.message}" | |
next | |
end | |
puts "[*] Translating to text..." | |
begin | |
confidence, text = wav_to_text(data, opts) | |
puts "[*] #{confidence.to_s} confidence: \"#{text[0,100]}...\"" | |
rescue ::Exception => e | |
puts "[x] Error: #{e.message}" | |
next | |
end | |
if analyze(text) | |
puts "[!] Interesting file: #{w}" | |
end | |
puts "[*] Saving to file..." | |
save(text, w, opts[:output_dir]) | |
end | |
end | |
init |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment