Created
October 12, 2010 06:07
-
-
Save ovargas27/621738 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Given a time ordered list of pageviews ("events"), | |
# determine how many "visits" each user had, | |
# how long each visit lasted and how many page views occurred during each visit. | |
# A visit ends when the user has not had another page view for 1 hour, | |
# a new pageview after 1 hours starts a new visit (the time between visits is 1 hour). | |
require 'enumerator' | |
class Array | |
def to_event | |
Event.new(self) | |
end | |
end | |
class Event < Array | |
def user_id | |
self[2] | |
end | |
def when | |
Time.at(self[3].to_i) | |
end | |
def valid? | |
self.user_id != "NULL" | |
end | |
end | |
class UserStadistics < Hash | |
def <<(event) | |
unless self.keys.include? event.user_id | |
key = event.user_id | |
self[key] = { :visits => [] } | |
self[key][:visits] << { :count => 1, :begin => event.when , :end => event.when } | |
else | |
if ( event.when - self[event.user_id][:visits].last[:end] ) < 3600 | |
self[event.user_id][:visits].last[:count] = self[event.user_id][:visits].last[:count] + 1 | |
self[event.user_id][:visits].last[:end] = event.when | |
else | |
self[event.user_id][:visits] << { :count => 1, :begin => event.when, :end => event.when } | |
end | |
end | |
end | |
def get_file_hash(filename) | |
events = [] | |
lines = File.new(filename).readlines | |
lines.each do |line| | |
row = line.strip.split("\t").to_event | |
events << row | |
end | |
events.select{|row| row.valid?} | |
end | |
def get_visits(filename) | |
events = get_file_hash(filename) | |
events.each do |event| | |
self << event | |
end | |
end | |
def print | |
self.each do |user| | |
p "-------------------------------------------------------------" | |
p "User ID: #{user[0]}" | |
p " Visits: #{user[1][:visits].count} " | |
user[1][:visits].enum_for(:each_with_index).collect do |visit, index| | |
p " Visit #{index + 1} - #{visit[:count]} pages views Duration: #{visit[:end] - visit[:begin]} secs" | |
end | |
end | |
p "================================================================" | |
end | |
end | |
users = UserStadistics.new | |
users.get_visits('./events.csv') | |
users.print |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment