Skip to content

Instantly share code, notes, and snippets.

@romanegloo
Created February 2, 2019 17:42
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save romanegloo/c0f1adae22460d7c624d25945ad19ea8 to your computer and use it in GitHub Desktop.
Save romanegloo/c0f1adae22460d7c624d25945ad19ea8 to your computer and use it in GitHub Desktop.
Indexing using PyLucene, example code
'''
Indexing using PyLucene, example code
'''
import os
from pathlib import Path
import lucene
from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, FieldType
from org.apache.lucene.index import (IndexOptions, IndexWriter,
IndexWriterConfig)
from org.apache.lucene.store import MMapDirectory
env = lucene.initVM(vmargs=['-Djava.awt.headless=true'])
fsDir = MMapDirectory(Paths.get('index'))
writerConfig = IndexWriterConfig(StandardAnalyzer())
writer = IndexWriter(fsDir, writerConfig)
print(f"{writer.numDocs()} docs found in index")
# Define field type
t1 = FieldType()
t1.setStored(True)
t1.setIndexOptions(IndexOptions.DOCS)
t2 = FieldType()
t2.setStored(False)
t2.setIndexOptions(IndexOptions.DOCS_AND_FREQS)
# Add a document
doc = Document()
doc.add(Field('id', '418129481', t1))
doc.add(Field('title', 'Lucene in Action', t1))
doc.add(Field('text', 'Hello Lucene, This is the first document', t2))
writer.addDocument(doc)
print(f"{writer.numDocs()} docs found in index")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment