Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Script for comparing the different ways of "boosting" queries in Solr
#!/bin/sh/env python
#
# Simple script for comparing the various ways of "boosting" queries in Solr,
# combined with the three main query parsers (lucene, dismax, and edismax).
# The idea is to generate md5sums of the search results, so I can quickly
# figure out which boost methods work the same as the other ones.
#
import re, urllib2, urllib, md5, json
solr = "http://localhost:8081"
parsers = ['lucene','dismax','edismax']
# more parsers: ['lucene','dismax','edismax', 'boost', 'field', 'func', 'frange']
boost = 'log(relevancy_score)'
query = 'diabetes'
base_params = {'echoParams':'none','rows':1000, 'omitHeader' : 'true', \
'qf' : 'text', 'debugQuery' : 'on', 'wt' : 'json'}
def boost_query(boost_method, query):
if (boost_method == 0): # no boost
return {'q':query}
if (boost_method == 1):
return {'q':'{!boost b=%s}%s' % (boost, query)}
elif (boost_method == 2):
return {'q':'{!boost b=$myboost v=$myquery}', 'myboost':boost, 'myquery':query}
elif (boost_method == 3):
return {'q':query,'bq':boost}
elif (boost_method == 4):
return {'q':query,'bf':boost}
else: # 5
return {'q':query,'boost':boost}
for parser in parsers:
print parser
for boost_method in range(6):
params = {'defType' : parser}
params.update(base_params)
boost_params = boost_query(boost_method, query)
params.update(boost_params)
encoded_params = urllib.urlencode(params)
url = solr + '/select?' + encoded_params
#print url
response = json.loads(urllib2.urlopen(url).read())
docs = response['response']
parsed_query = response['debug']['parsedquery']
md5sum = md5.new(str(docs)).hexdigest()
print " %s: %s (%s)" % (boost_method,md5sum,boost_params)
print " " + str(parsed_query)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.