Skip to content

Instantly share code, notes, and snippets.

@crescentrose
Last active September 11, 2015 20:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save crescentrose/c3158917b6d8ba39c65c to your computer and use it in GitHub Desktop.
Save crescentrose/c3158917b6d8ba39c65c to your computer and use it in GitHub Desktop.
match words from a text file to Unix dictionary
#!/usr/bin/env ruby
# Usage:
# ./wordcheck.rb --regex "regular expression" [--dictionary "dictionary file"] file
require "optparse"
options = {:regex => nil, :dict => nil}
parser = OptionParser.new do |opts|
opts.banner = "Usage: wordcheck.rb [options] file(s)"
opts.on('-r', '--regex expression', 'Regular expression to search for') do |regex|
options[:regex] = regex;
end
opts.on('-d', '--dictionary wordsfile', 'Dictionary file (default: /usr/share/dict/words)') do |dictfile|
options[:dictionary] = dictfile;
end
opts.on('-h', '--help', 'Displays Help') do
puts opts
exit
end
end
parser.parse!
if options[:regex] == nil
puts "You must specify a regular expression to search."
exit
end
if options[:dictionary] == nil
options[:dictionary] = "/usr/share/dict/words"
end
regex = options[:regex]
dict = options[:dictionary]
words = Array.new
dictionary = Hash.new(false)
begin
File.open(dict) do |dictionary_file|
dictionary_file.each_line do |word|
dictionary[word.downcase.strip] = true
end
end
puts "dictionary loaded, #{dictionary.length} words available"
rescue (Exception ex)
puts "Can't read dictionary file #{dict}: #{ex.message}"
exit
end
ARGV.each do |arg|
puts ""
line_number = 0
begin
File.open(arg) do |file|
file.each_line do |line|
line_number += 1
begin
line.match(regex).captures.each do |w|
words.push(w.downcase)
end
rescue
line_number -= 1
end
end
end
rescue (Exception ex)
puts "Can't read dictionary file #{dict}: #{ex.message}"
end
matches = 0
words.each do |word|
if dictionary[word] then
puts "#{arg}: Match found: #{word}"
matches += 1
end
end
words.clear
puts ""
perc=(matches.to_f/([line_number, 1].max)*100).round(4)
puts "#{arg}: Finished search, #{matches} matches found over #{line_number} eligible lines (#{perc}%)"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment