Skip to content

Instantly share code, notes, and snippets.

@leonid-shevtsov
Created August 13, 2010 14:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save leonid-shevtsov/522974 to your computer and use it in GitHub Desktop.
Save leonid-shevtsov/522974 to your computer and use it in GitHub Desktop.
require 'ipaddr'
# This is a class that tracks visits on a site internally.
# One of the major demands was determining where the visitor came from.
class Visitor < ActiveRecord::Base
validates_presence_of :entry_uri
validates_presence_of :exit_uri
validates_presence_of :ip
def self.recognizes_source(source_name, options = nil)
@@recognizers ||= []
source_class = "#{source_name}_recognizer".classify.constantize
@@recognizers << source_class.new(options)
end
recognizes_source :direct_visit
recognizes_source :mail
recognizes_source :yandex_direct
recognizes_source :search_engines
recognizes_source :referring_site, :host => 'marketgid.com', :as => 'marketgid'
recognizes_source :referring_site, :host => 'ru.redtram.com', :as => 'redtram'
def self.new_visit!(ip, user_agent, referer_uri, entry_uri)
visitor = Visitor.new(
:ip => ip,
:user_agent => user_agent,
:referer_uri => referer_uri,
:entry_uri => entry_uri,
:pages_count => 0
)
recognize_source(visitor)
visitor.visit_page!(entry_uri)
visitor
end
def visit_page!(page_uri)
update_attributes!(
:pages_count => pages_count+1,
:exit_uri => page_uri
)
end
def ip=(new_ip)
if new_ip.is_a? String
self[:ip] = IPAddr.new(new_ip).to_i
elsif new_ip.is_a? Integer
self[:ip] = IPAddr.new(new_ip, Socket::AF_INET).to_i
else
raise 'IP address should be a string or an integer'
end
end
def self.search_queries_for_url(url)
url.gsub!(/\/$/,'')
Rails.cache.fetch("queries_for_#{Digest::MD5.hexdigest(url)}", :expires_in => 1.day) do
find(:all,
:conditions => ['entry_uri=? AND source LIKE "search_%"', url],
:group => 'lower(query)',
:select => '* ,count(*) as weight',
:order => 'weight DESC',
:limit => 15).map{|v| v.query}
end
end
def self.recognize_source(visitor)
@@recognizers.each do |recognizer|
recognizer.recognize(visitor)
return unless visitor.source.blank?
end
end
before_create :generate_key
private
def generate_key
self[:key] = ActiveSupport::SecureRandom.hex(8)
end
end
require 'uri'
require 'cgi'
require 'iconv'
class ReferringSiteRecognizer < SourceRecognizer
def initialize(options = {})
@host = options[:host]
@as = options.fetch(:as, @host)
end
def recognize(visitor)
begin
host = URI.parse(visitor.referer_uri).host
visitor.source = @as if (host == @host || host == "www.#{@host}")
rescue URI::InvalidURIError
end
end
end
require 'uri'
require 'cgi'
require 'iconv'
class SearchEnginesRecognizer < SourceRecognizer
def recognize(visitor)
begin
uri = URI.parse(visitor.referer_uri)
fullpath = uri.host.to_s + uri.path.to_s.gsub(/\/$/,'')
if (engine = @@engines_map[fullpath])
params = CGI.parse(uri.query)
if (query = params[engine[:param]])
encoding = engine[:encoding].is_a?(Proc) ? engine[:encoding].call(params) : engine[:encoding]
query = (encoding == 'UTF8') ? query.to_s : Iconv.conv('UTF8', encoding, query.to_s)
begin
Iconv.conv('UTF8','UTF8',query)
rescue Iconv::IllegalSequence
#hoping it's CP1251
query = Iconv.conv('UTF8//TRANSLIT//IGNORE', 'CP1251', query)
end
visitor.source = "search_#{engine[:engine]}"
visitor.query = query.strip
return
end
end
rescue URI::InvalidURIError
end
end
def self.engines
@@engines_map
end
private
@@engines_map = {
'go.mail.ru/search' => {:engine => 'mail.ru',:encoding => lambda{|p| (p['utf8in']==['1'])||(p['fr']==['oprtb']) ? 'UTF8' : 'CP1251'}, :param => 'q'},
'meta.ua/search.asp' => {:engine => 'meta.ua', :encoding => 'CP1251', :param => 'q'},
'nigma.ru/index.php' => {:engine => 'nigma.ru', :encoding => 'UTF8', :param => 's'},
'nova.rambler.ru/search' => {:engine => 'rambler.ru', :encoding => 'UTF8', :param => 'query'},
'search.i.ua' => {:engine => 'yandex.ru', :encoding => 'CP1251', :param => 'q'},
'search.ru.redtram.com' => {:engine => 'redtram.com', :encoding => 'UTF8', :param => 'q'},
'search.ukr.net/yandex/bs.php' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'search_query'},
'search.ukr.net/yandex/search.php' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'search_query'},
'www.bing.com/search' => {:engine => 'bing.com', :encoding => 'UTF8', :param => 'q'},
'www.google.com.ua/search' => {:engine => 'google.com', :encoding => 'UTF8', :param => 'q'},
'www.google.com/search' => {:engine => 'google.com', :encoding => 'UTF8', :param => 'q'},
'www.google.ru/search' => {:engine => 'google.com', :encoding => 'UTF8', :param => 'q'},
'www.nigma.ru/index.php' => {:engine => 'nigma.ru', :encoding => 'UTF8', :param => 's'},
'www.yandex.ru/yandsearch' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'text'},
'yandex.ru/yandsearch' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'text'},
'yandex.ua/yandsearch' => {:engine => 'yandex.ru', :encoding => 'UTF8', :param => 'text'},
}
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment