Skip to content

Instantly share code, notes, and snippets.

@ColadaFF
Created October 10, 2015 14:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ColadaFF/1d6557ebaa147753bc9f to your computer and use it in GitHub Desktop.
Save ColadaFF/1d6557ebaa147753bc9f to your computer and use it in GitHub Desktop.
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
import java.io.*;
import java.util.List;
import java.util.Set;
public class LuceneIndexWriter {
static final String INDEX_PATH = "indexDir";
static final String JSON_FILE_PATH = "docs.json";
static final String STOPWORDS_FILE_PATH = "stopwords.txt";
String indexPath;
String jsonFilePath;
IndexWriter indexWriter = null;
public LuceneIndexWriter(String indexPath, String jsonFilePath) {
this.indexPath = indexPath;
this.jsonFilePath = jsonFilePath;
}
public void createIndex() throws FileNotFoundException {
JSONArray jsonObjects = parseJSONFile();
openIndex();
addDocuments(jsonObjects);
finish();
}
public JSONArray parseJSONFile() throws FileNotFoundException {
InputStream jsonFile = new FileInputStream(jsonFilePath);
Reader readerJson = new InputStreamReader(jsonFile);
//Parse the json file using simple-json library
Object fileObjects = JSONValue.parse(readerJson);
JSONArray arrayObjects = (JSONArray) fileObjects;
return arrayObjects;
}
public boolean openIndex() {
try {
InputStream stopWords = new FileInputStream(STOPWORDS_FILE_PATH);
Reader readerStopWords = new InputStreamReader(stopWords);
Directory dir = FSDirectory.open(new File(indexPath));
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46, readerStopWords);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_46, analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
indexWriter = new IndexWriter(dir, iwc);
return true;
} catch (Exception e) {
System.err.println("Error opening the index. " + e.getMessage());
}
return false;
}
/**
* Add documents to the index
*/
public void addDocuments(JSONArray jsonObjects) {
for (JSONObject object : (List<JSONObject>) jsonObjects) {
Document doc = new Document();
final FieldType bodyOptions = new FieldType();
bodyOptions.setIndexed(true);
bodyOptions.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
bodyOptions.setStored(true);
bodyOptions.setStoreTermVectors(true);
bodyOptions.setTokenized(true);
for (String field : (Set<String>) object.keySet()) {
doc.add(new Field(field, (String) object.get(field), bodyOptions));
}
try {
System.out.println(doc);
indexWriter.addDocument(doc);
} catch (IOException ex) {
System.err.println("Error adding documents to the index. " + ex.getMessage());
}
}
}
/**
* Write the document to the index and close it
*/
public void finish() {
try {
indexWriter.commit();
indexWriter.close();
} catch (IOException ex) {
System.err.println("We had a problem closing the index: " + ex.getMessage());
}
}
public static void main(String[] args) throws FileNotFoundException {
LuceneIndexWriter liw = new LuceneIndexWriter(INDEX_PATH, JSON_FILE_PATH);
liw.createIndex();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment