Last active August 29, 2015 14:10
Parse a Heroku log file and do some analysis on it.
#!/usr/bin/env ruby
## return the average of array
# @arr array of numbers to return its average
def getAverage (arr)
return arr.inject{ |sum, el| sum + el }.to_f / arr.size
## return the median of array
# @arr array of numbers to return its median
def getMedian (arr)
return arr.sort[arr.size/2] || 'NaN'
## return the mode of array
# @arr array of numbers to return its mode
def getMode (arr)
freq = arr.inject( { |h,v| h[v] += 1; h }
return arr.max_by { |v| freq[v] } || 'NaN'
## return the sum of connect and service time
# @line to find connect time and service time from it
def getTime (line)
connect = line.match(/connect=(\d+)ms/)
service = line.match(/service=(\d+)ms/)
return connect[1].to_i+service[1].to_i
## Read File and fill data
# @data route data to fill
# @dynosArray responded data to fill
# @matchRoutes routes regex to find routes
def readFile (filePath, data, dynosArray, matchRoutes)
# Open file for reading, "r") do |infile|
# While there is new line
while (line = infile.gets)
# Foreach route regex
matchRoutes.each do |route|
# Set route name
routeName = route[0]
# Set route method
routeMethod = route[1][:method]
# Set route path
routePath = route[1][:path]
# check if it's matched to the route regex
if line.match(/method=#{routeMethod}/) && line.match(/path=#{routePath}/)
# Increase number of call
data[routeName][:count] += 1
# Add the connect time and service time to times array of the route
data[routeName][:times].push( getTime(line) )
# Add dyno of this call to dyno array
dynosArray.push( line.match(/dyno=web.(\d+)/)[1] )
# Break the each
## Print all data
# @data data of routes to print
# @dynosArray dynos array to print
def printData (data, dynosArray)
data.each do |route|
puts "======================== #{route[0]} ================================"
puts "#{route[0]} count: #{data[route[0]][:count]}"
puts "#{route[0]} avg: #{getAverage(data[route[0]][:times])}ms"
puts "#{route[0]} median: #{getMedian(data[route[0]][:times])}ms"
puts "#{route[0]} mode: #{getMode(data[route[0]][:times])}ms"
puts "======================== Dyno ================================"
puts "Dyno that responded most for previous route is: web.#{getMode(dynosArray)}"
def init
# Init variables
# Data of Routes
# When you add new route initialize it here
data = {
pendingMessages: { count: 0, times: [] },
getMessages: { count: 0, times: [] },
getFriendsProgress: { count: 0, times: [] },
getFriendsScore: { count: 0, times: [] },
createUser: { count: 0, times: [] },
getUser: { count: 0, times: [] }
# Routes regex
# When you add new route add its regex here
matchRoutes = {
pendingMessages: { method: 'GET', path: /\/api\/users\/\d+\/count_pending_messages\s/ },
getMessages: { method: 'GET', path: /\/api\/users\/\d+\/get_messages\s/ },
getFriendsProgress: { method: 'GET', path: /\/api\/users\/\d+\/get_friends_progress\s/ },
getFriendsScore: { method: 'GET', path: /\/api\/users\/\d+\/get_friends_score\s/ },
createUser: { method: 'POST', path: /\/api\/users\/\d+\s/, },
getUser: { method: 'GET', path: /\/api\/users\/\d+\s/ }
# Responded dynos for previous routes
dynosArray = []
# Get file path
filePath = ARGV[0]
# Read file and fill data
puts "Calculating..."
readFile(filePath, data, dynosArray, matchRoutes)
# Print data
puts "Printing..."
printData(data, dynosArray)
  • Make the file executable.
  • Execute it $ ./parseLog.rb sample.log so the first argument is the log file.

Github Repo

You can find the final version of this file with testing in this Github repository

