Skip to content

Instantly share code, notes, and snippets.

@zarkzork
Created May 23, 2013 11:04
Show Gist options
  • Save zarkzork/5635297 to your computer and use it in GitHub Desktop.
Save zarkzork/5635297 to your computer and use it in GitHub Desktop.
Simple file search in Ruby
class String
def triads()
string = self.clone
Enumerator.new do |y|
while string.size > 2
y << string.slice(0, 3)
string.slice!(0)
end
end
end
end
class Tokenizer
def initialize(content)
@content = content
end
def triads
@content.gsub(/[^[:alnum:].:_]+/, ' ').split(' ').map(&:triads).map(&:to_a).flatten
end
end
class FileMatcher
def initialize(paths)
@db = build_db(paths)
end
def files_for_query(string)
triads = Tokenizer.new(string).triads
files = @db[triads.shift].to_a
triads.inject(files){ |old_values, triad|
old_values.map{ |path, line_num, index| [path, line_num, index + 1] } & @db[triad]
}.map{ |o, line_num, _| [o, line_num] }.uniq
end
private
def build_db(paths)
storage = Hash.new{ |h,k| h[k] = [] }
paths.each do |path|
content = IO.readlines(path).map(&:chomp).map{ |s| s.force_encoding('utf-8') }
content.each_with_index do |line, line_num|
Tokenizer.new(line).triads.each_with_index{ |triad, index| storage[triad] << [path, line_num, index] }
end
end
storage
end
end
class FileIndex
def initialize(paths)
@index = build_index(paths)
end
def get_line(path, line)
@index[path][line]
end
private
def build_index(paths)
paths.inject({}) do |acc, path|
lines = IO.readlines(path).map(&:chomp).map{ |s| s.force_encoding('utf-8') }
acc[path] = Hash[lines.each_with_index.to_a.map(&:reverse)]
acc
end
end
end
class Engine
def search(string)
@file_matcher.files_for_query(string).map do |path_line|
line_content = @file_index.get_line(*path_line)
if line_content =~ /#{string}/
path_line.push(line_content)
else
nil
end
end.compact
end
def initialize(path, extensions)
@paths = get_paths(path, extensions)
@file_matcher = FileMatcher.new(@paths)
@file_index = FileIndex.new(@paths)
end
def get_paths(path, extensions)
extensions_part = extensions.map{ |o| "-name '*#{o}'" }.join(' -or ')
`find #{path} #{extensions_part}`.split("\n")
end
end
class Server
def parse_command
line = $stdin.readline.chomp
command, arguments = line.split(' ', 2)
if commands.include?(command)
self.send(command, arguments)
else
puts "ERROR: no such command: #{command}"
end
end
private
def commands; %w[init search]; end
def init(arguments)
path, *extensions_part = arguments.split(' ')
@engine = Engine.new('data', ['.rb'])
puts 'ok'
end
def search(arguments)
if @engine
results = @engine.search(arguments)
puts results.map{ |file, line, string| "#{file}:#{line} #{string}"}
puts "ok"
else
puts "ERROR: you need to issue init first."
end
end
end
@server = Server.new
@server.parse_command while true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment