jorgehatccrma/demoLuceneJython.py

## demoLuceneJython.py
"""
This simple Jython script shows how to use Apache Lucene
directly in a Jython script
"""

# your usual Python imports
import sys
from contextlib import contextmanager

# add jars to classpath
jars = [
    "lucene-7.1.0/core/lucene-core-7.1.0.jar",
    "lucene-7.1.0/queryparser/lucene-queryparser-7.1.0.jar",
]
for jar in jars:
    sys.path.append(jar)

# Now that jars are in the path, we can import java code as if it
# was regular Python!
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document
from org.apache.lucene.document import Field
from org.apache.lucene.document import StringField
from org.apache.lucene.document import TextField
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.index import IndexWriter
from org.apache.lucene.index import IndexWriterConfig
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.store import RAMDirectory


@contextmanager
def closing(thing):
    """
    Simple wrapper to make Lucene's classes appear more pythonic.
    """
    try:
        yield thing
    finally:
        thing.close()


def make_index(analyzer):
    """ Create an inverted index to power the search. """

    def add_doc(w, title, isbn):
        """ Utility to add "documents" to the index. """
        doc = Document()
        doc.add(TextField("title", title, Field.Store.YES))
        # use a string field for isbn because we don't
        # want it tokenized
        doc.add(StringField("isbn", isbn, Field.Store.YES))
        w.addDocument(doc)

    # create the index
    index = RAMDirectory()

    config = IndexWriterConfig(analyzer)
    with closing(IndexWriter(index, config)) as w:
        add_doc(w, "Lucene in Action", "193398817")
        add_doc(w, "Lucene for Dummies", "55320055Z")
        add_doc(w, "Managing Gigabytes", "55063554A")
        add_doc(w, "The Art of Computer Science", "9900333X")

    return index


def query(querystr, index, analyzer):
    """ Search for the `querystr` in the index. """

    # the "title" arg specifies the default field to use
    # when no field is explicitly specified in the query.
    q = QueryParser("title", analyzer).parse(querystr)

    # search
    hitsPerPage = 10
    with closing(DirectoryReader.open(index)) as reader:
        searcher = IndexSearcher(reader)
        docs = searcher.search(q, hitsPerPage)
        hits = docs.scoreDocs
        # display results (needs reader to be open)
        print("Found {:d} hits.".format(len(hits)))
        for i, hit in enumerate(hits):
            docId = hit.doc
            d = searcher.doc(docId)
            print("{:d}. {}\t{}".format(i + 1, d.get("isbn"), d.get("title")))


if __name__ == "__main__":

    # Specify the analyzer for tokenizing text.
    # The same analyzer should be used for indexing and searching
    analyzer = StandardAnalyzer()

    # create the index to search
    index = make_index(analyzer)

    # perform a search
    querystr = sys.argv[1] if len(sys.argv) > 1 else "lucene"
    query(querystr, index, analyzer)
	"""
	This simple Jython script shows how to use Apache Lucene
	directly in a Jython script
	"""

	# your usual Python imports
	import sys
	from contextlib import contextmanager

	# add jars to classpath
	jars = [
	"lucene-7.1.0/core/lucene-core-7.1.0.jar",
	"lucene-7.1.0/queryparser/lucene-queryparser-7.1.0.jar",
	]
	for jar in jars:
	sys.path.append(jar)

	# Now that jars are in the path, we can import java code as if it
	# was regular Python!
	from org.apache.lucene.analysis.standard import StandardAnalyzer
	from org.apache.lucene.document import Document
	from org.apache.lucene.document import Field
	from org.apache.lucene.document import StringField
	from org.apache.lucene.document import TextField
	from org.apache.lucene.index import DirectoryReader
	from org.apache.lucene.index import IndexWriter
	from org.apache.lucene.index import IndexWriterConfig
	from org.apache.lucene.queryparser.classic import QueryParser
	from org.apache.lucene.search import IndexSearcher
	from org.apache.lucene.store import RAMDirectory


	@contextmanager
	def closing(thing):
	"""
	Simple wrapper to make Lucene's classes appear more pythonic.
	"""
	try:
	yield thing
	finally:
	thing.close()


	def make_index(analyzer):
	""" Create an inverted index to power the search. """

	def add_doc(w, title, isbn):
	""" Utility to add "documents" to the index. """
	doc = Document()
	doc.add(TextField("title", title, Field.Store.YES))
	# use a string field for isbn because we don't
	# want it tokenized
	doc.add(StringField("isbn", isbn, Field.Store.YES))
	w.addDocument(doc)

	# create the index
	index = RAMDirectory()

	config = IndexWriterConfig(analyzer)
	with closing(IndexWriter(index, config)) as w:
	add_doc(w, "Lucene in Action", "193398817")
	add_doc(w, "Lucene for Dummies", "55320055Z")
	add_doc(w, "Managing Gigabytes", "55063554A")
	add_doc(w, "The Art of Computer Science", "9900333X")

	return index


	def query(querystr, index, analyzer):
	""" Search for the `querystr` in the index. """

	# the "title" arg specifies the default field to use
	# when no field is explicitly specified in the query.
	q = QueryParser("title", analyzer).parse(querystr)

	# search
	hitsPerPage = 10
	with closing(DirectoryReader.open(index)) as reader:
	searcher = IndexSearcher(reader)
	docs = searcher.search(q, hitsPerPage)
	hits = docs.scoreDocs
	# display results (needs reader to be open)
	print("Found {:d} hits.".format(len(hits)))
	for i, hit in enumerate(hits):
	docId = hit.doc
	d = searcher.doc(docId)
	print("{:d}. {}\t{}".format(i + 1, d.get("isbn"), d.get("title")))


	if __name__ == "__main__":

	# Specify the analyzer for tokenizing text.
	# The same analyzer should be used for indexing and searching
	analyzer = StandardAnalyzer()

	# create the index to search
	index = make_index(analyzer)

	# perform a search
	querystr = sys.argv[1] if len(sys.argv) > 1 else "lucene"
	query(querystr, index, analyzer)