Created
August 13, 2010 14:33
-
-
Save leonid-shevtsov/522974 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'ipaddr' | |
# This is a class that tracks visits on a site internally. | |
# One of the major demands was determining where the visitor came from. | |
class Visitor < ActiveRecord::Base | |
validates_presence_of :entry_uri | |
validates_presence_of :exit_uri | |
validates_presence_of :ip | |
def self.recognizes_source(source_name, options = nil) | |
@@recognizers ||= [] | |
source_class = "#{source_name}_recognizer".classify.constantize | |
@@recognizers << source_class.new(options) | |
end | |
recognizes_source :direct_visit | |
recognizes_source :mail | |
recognizes_source :yandex_direct | |
recognizes_source :search_engines | |
recognizes_source :referring_site, :host => 'marketgid.com', :as => 'marketgid' | |
recognizes_source :referring_site, :host => 'ru.redtram.com', :as => 'redtram' | |
def self.new_visit!(ip, user_agent, referer_uri, entry_uri) | |
visitor = Visitor.new( | |
:ip => ip, | |
:user_agent => user_agent, | |
:referer_uri => referer_uri, | |
:entry_uri => entry_uri, | |
:pages_count => 0 | |
) | |
recognize_source(visitor) | |
visitor.visit_page!(entry_uri) | |
visitor | |
end | |
def visit_page!(page_uri) | |
update_attributes!( | |
:pages_count => pages_count+1, | |
:exit_uri => page_uri | |
) | |
end | |
def ip=(new_ip) | |
if new_ip.is_a? String | |
self[:ip] = IPAddr.new(new_ip).to_i | |
elsif new_ip.is_a? Integer | |
self[:ip] = IPAddr.new(new_ip, Socket::AF_INET).to_i | |
else | |
raise 'IP address should be a string or an integer' | |
end | |
end | |
def self.search_queries_for_url(url) | |
url.gsub!(/\/$/,'') | |
Rails.cache.fetch("queries_for_#{Digest::MD5.hexdigest(url)}", :expires_in => 1.day) do | |
find(:all, | |
:conditions => ['entry_uri=? AND source LIKE "search_%"', url], | |
:group => 'lower(query)', | |
:select => '* ,count(*) as weight', | |
:order => 'weight DESC', | |
:limit => 15).map{|v| v.query} | |
end | |
end | |
def self.recognize_source(visitor) | |
@@recognizers.each do |recognizer| | |
recognizer.recognize(visitor) | |
return unless visitor.source.blank? | |
end | |
end | |
before_create :generate_key | |
private | |
def generate_key | |
self[:key] = ActiveSupport::SecureRandom.hex(8) | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'uri' | |
require 'cgi' | |
require 'iconv' | |
class ReferringSiteRecognizer < SourceRecognizer | |
def initialize(options = {}) | |
@host = options[:host] | |
@as = options.fetch(:as, @host) | |
end | |
def recognize(visitor) | |
begin | |
host = URI.parse(visitor.referer_uri).host | |
visitor.source = @as if (host == @host || host == "www.#{@host}") | |
rescue URI::InvalidURIError | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'uri' | |
require 'cgi' | |
require 'iconv' | |
class SearchEnginesRecognizer < SourceRecognizer | |
def recognize(visitor) | |
begin | |
uri = URI.parse(visitor.referer_uri) | |
fullpath = uri.host.to_s + uri.path.to_s.gsub(/\/$/,'') | |
if (engine = @@engines_map[fullpath]) | |
params = CGI.parse(uri.query) | |
if (query = params[engine[:param]]) | |
encoding = engine[:encoding].is_a?(Proc) ? engine[:encoding].call(params) : engine[:encoding] | |
query = (encoding == 'UTF8') ? query.to_s : Iconv.conv('UTF8', encoding, query.to_s) | |
begin | |
Iconv.conv('UTF8','UTF8',query) | |
rescue Iconv::IllegalSequence | |
#hoping it's CP1251 | |
query = Iconv.conv('UTF8//TRANSLIT//IGNORE', 'CP1251', query) | |
end | |
visitor.source = "search_#{engine[:engine]}" | |
visitor.query = query.strip | |
return | |
end | |
end | |
rescue URI::InvalidURIError | |
end | |
end | |
def self.engines | |
@@engines_map | |
end | |
private | |
@@engines_map = { | |
'go.mail.ru/search' => {:engine => 'mail.ru',:encoding => lambda{|p| (p['utf8in']==['1'])||(p['fr']==['oprtb']) ? 'UTF8' : 'CP1251'}, :param => 'q'}, | |
'meta.ua/search.asp' => {:engine => 'meta.ua', :encoding => 'CP1251', :param => 'q'}, | |
'nigma.ru/index.php' => {:engine => 'nigma.ru', :encoding => 'UTF8', :param => 's'}, | |
'nova.rambler.ru/search' => {:engine => 'rambler.ru', :encoding => 'UTF8', :param => 'query'}, | |
'search.i.ua' => {:engine => 'yandex.ru', :encoding => 'CP1251', :param => 'q'}, | |
'search.ru.redtram.com' => {:engine => 'redtram.com', :encoding => 'UTF8', :param => 'q'}, | |
'search.ukr.net/yandex/bs.php' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'search_query'}, | |
'search.ukr.net/yandex/search.php' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'search_query'}, | |
'www.bing.com/search' => {:engine => 'bing.com', :encoding => 'UTF8', :param => 'q'}, | |
'www.google.com.ua/search' => {:engine => 'google.com', :encoding => 'UTF8', :param => 'q'}, | |
'www.google.com/search' => {:engine => 'google.com', :encoding => 'UTF8', :param => 'q'}, | |
'www.google.ru/search' => {:engine => 'google.com', :encoding => 'UTF8', :param => 'q'}, | |
'www.nigma.ru/index.php' => {:engine => 'nigma.ru', :encoding => 'UTF8', :param => 's'}, | |
'www.yandex.ru/yandsearch' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'text'}, | |
'yandex.ru/yandsearch' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'text'}, | |
'yandex.ua/yandsearch' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'text'}, | |
} | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment