Skip to content

Instantly share code, notes, and snippets.

@zerowidth
Created February 24, 2011 03:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zerowidth/841677 to your computer and use it in GitHub Desktop.
Save zerowidth/841677 to your computer and use it in GitHub Desktop.
comparison of mongo's grep vs. pcregrep on 400+MB of log files
source :rubygems
gem "bson_ext"
gem "mongo"
gem "logging"
require "rubygems"
require "bundler"
Bundler.require(:default)
db = Mongo::Connection.new("localhost").db("f3h")
logs = db.collection("logs")
cursor = logs.find({"line" => { "$regex" => Regexp.compile(ARGV.first, true)}}, :limit => 0)
puts cursor.count
require "rubygems"
require "bundler"
Bundler.require(:default)
require "iconv"
logger = Logging.logger(STDOUT)
logger.level = :info
db = Mongo::Connection.new("localhost").db("f3h")
logs = db.collection("logs")
start = Time.now
logger.info "starting load..."
iconv = Iconv.new "UTF8", "ISO-8859-1"
Dir.glob("**/*.log").sort.each do |logfile|
logger.info "loading #{logfile}"
File.open(logfile, "r:ISO-8859-1") do |input|
input.each_line do |line|
line = iconv.conv(line)
# timestamp = /^\[([^\]]+)\]/.match(line)[1]
# time = Time.strptime timestamp, "%m/%d/%y %H:%M:%S"
logs.insert :line => line
end
end
end
logger.info "done loading, took #{Time.now - start} seconds"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment