Skip to content

Instantly share code, notes, and snippets.

@robrocker7
Created August 2, 2012 20:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save robrocker7/3240573 to your computer and use it in GitHub Desktop.
Save robrocker7/3240573 to your computer and use it in GitHub Desktop.
class SearchEngineReferrerMiddleware(object):
"""
Usage example:
==============
{% if request.session.search_engine %}
You searched for {{ request.session.search_term }} using {{ request.session.search_engine }}.
{% endif %}
"""
SEARCH_PARAMS = {
'AltaVista': 'q',
'Ask': 'q',
'Google': 'q',
'Live': 'q',
'Lycos': 'query',
'MSN': 'q',
'Yahoo': 'p',
'Cuil': 'q',
}
NETWORK_RE = r"""^
(?P<subdomain>[-.a-z\d]+\.)?
(?P<engine>%s)
(?P<top_level>(?:\.[a-z]{2,3}){1,2})
(?P<port>:\d+)?
$(?ix)"""
@classmethod
def parse_search(cls, url):
"""
Extract the search engine, domain, and search term from `url`
and return them as (engine, domain, term). For example,
('Google', 'www.google.co.uk', 'django framework'). Note that
the search term will be converted to lowercase and have normalized
spaces.
The first tuple item will be None if the referrer is not a
search engine.
"""
try:
parsed = urlparse.urlsplit(url)
network = parsed[1]
query = parsed[3]
except (AttributeError, IndexError):
return (None, None, None)
for engine, param in cls.SEARCH_PARAMS.iteritems():
match = re.match(cls.NETWORK_RE % engine, network)
if match and match.group(2):
term = cgi.parse_qs(query).get(param)
if term and term[0]:
term = ' '.join(term[0].split()).lower()
return (engine, network, term)
return (None, network, None)
def process_request(self, request):
referrer = request.META.get('HTTP_REFERER')
engine, domain, term = self.parse_search(referrer)
request.session['search_engine'] = engine
request.session['search_domain'] = domain
request.session['search_term'] = term
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment