Skip to content

Instantly share code, notes, and snippets.

@daramcq
Created October 9, 2013 22:17
Show Gist options
  • Save daramcq/6909467 to your computer and use it in GitHub Desktop.
Save daramcq/6909467 to your computer and use it in GitHub Desktop.
Search engine using xapian
import sys
import xapian
import string
from config_file import *
def indexThread(thread_id, content):
try:
# Open the database for update, creating a new database if necessary.
database = xapian.WritableDatabase(db_global, xapian.DB_CREATE_OR_OPEN)
indexer = xapian.TermGenerator()
stemmer = xapian.Stem("english")
indexer.set_stemmer(stemmer)
try:
doc = xapian.Document()
doc.set_data(content)
doc.add_value(id_global, thread_id)
indexer.set_document(doc)
indexer.index_text(content)
database.add_document(doc)
except Exception as e:
print(str(e))
except Exception, e:
print >> sys.stderr, "Exception: %s" % str(e)
sys.exit(1)
import sys
import xapian
from config_file import *
def search(terms):
try:
# Open the database for searching.
database = xapian.Database(db_global)
# Start an enquire session.
enquire = xapian.Enquire(database)
# Combine the rest of the command line arguments with spaces between
# them, so that simple queries don't have to be quoted at the shell
# level.
query_string = str.join(' ', terms)
# Parse the query string to produce a Xapian::Query object.
qp = xapian.QueryParser()
stemmer = xapian.Stem("english")
qp.set_stemmer(stemmer)
qp.set_database(database)
qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
query = qp.parse_query(query_string)
# Find the top 10 results for the query.
enquire.set_query(query)
matches = enquire.get_mset(0, 10)
# Display the results.
#print "%i results found." % matches.get_matches_estimated()
#print "Results 1-%i:" % matches.size()
results = []
for m in matches:
result = { "id" : m.document.get_value(id_global),
"content" : m.content,
"rank" : m.rank+1
}
results.append(d)
return results
except Exception, e:
raise Exception
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment