Skip to content

Instantly share code, notes, and snippets.

@ovargas27
Created October 12, 2010 06:07
Show Gist options
  • Save ovargas27/621738 to your computer and use it in GitHub Desktop.
Save ovargas27/621738 to your computer and use it in GitHub Desktop.
# Given a time ordered list of pageviews ("events"),
# determine how many "visits" each user had,
# how long each visit lasted and how many page views occurred during each visit.
# A visit ends when the user has not had another page view for 1 hour,
# a new pageview after 1 hours starts a new visit (the time between visits is 1 hour).
require 'enumerator'
class Array
def to_event
Event.new(self)
end
end
class Event < Array
def user_id
self[2]
end
def when
Time.at(self[3].to_i)
end
def valid?
self.user_id != "NULL"
end
end
class UserStadistics < Hash
def <<(event)
unless self.keys.include? event.user_id
key = event.user_id
self[key] = { :visits => [] }
self[key][:visits] << { :count => 1, :begin => event.when , :end => event.when }
else
if ( event.when - self[event.user_id][:visits].last[:end] ) < 3600
self[event.user_id][:visits].last[:count] = self[event.user_id][:visits].last[:count] + 1
self[event.user_id][:visits].last[:end] = event.when
else
self[event.user_id][:visits] << { :count => 1, :begin => event.when, :end => event.when }
end
end
end
def get_file_hash(filename)
events = []
lines = File.new(filename).readlines
lines.each do |line|
row = line.strip.split("\t").to_event
events << row
end
events.select{|row| row.valid?}
end
def get_visits(filename)
events = get_file_hash(filename)
events.each do |event|
self << event
end
end
def print
self.each do |user|
p "-------------------------------------------------------------"
p "User ID: #{user[0]}"
p " Visits: #{user[1][:visits].count} "
user[1][:visits].enum_for(:each_with_index).collect do |visit, index|
p " Visit #{index + 1} - #{visit[:count]} pages views Duration: #{visit[:end] - visit[:begin]} secs"
end
end
p "================================================================"
end
end
users = UserStadistics.new
users.get_visits('./events.csv')
users.print
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment