Skip to content

Instantly share code, notes, and snippets.

@UmarIqbal
Created January 17, 2014 12:47
Show Gist options
  • Save UmarIqbal/8472847 to your computer and use it in GitHub Desktop.
Save UmarIqbal/8472847 to your computer and use it in GitHub Desktop.
package searchIndexer;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
public class DocSearcher {
// location where the index will be stored.
private String INDEX_DIR;
private String FILES_DIR;
private String CONFIG_FILE;
public DocSearcher() {
Properties prop = new Properties();
try {
prop.load(new FileInputStream("path.properties"));
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
INDEX_DIR = prop.getProperty("INDEX_DIR");
FILES_DIR = prop.getProperty("FILES_DIR");
CONFIG_FILE = prop.getProperty("CONFIG_FILE");
// INDEX_DIR = "C:\\Documents and Settings\\uiqbal\\Desktop\\IndexFolder";
// FILES_DIR = "C:\\Documents and Settings\\uiqbal\\Desktop\\IndexFiles\\";
// CONFIG_FILE = "C:\\Documents and Settings\\uiqbal\\Desktop\\IndexFolder\\Indexconfig.txt";
}
public static void main(String[] args) {
DocSearcher docSearcher=new DocSearcher();
docSearcher.createIndex();
}
public boolean createIndex() {
try {
System.out.println("-----------------------------INDEXING--------------------------");
List<File> pdfFiles = new ArrayList<File>(Arrays.asList(new File(FILES_DIR).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
// TODO Auto-generated method stub
return name.endsWith(".pdf");
}
} )));
List<String> indexedDocuments=new ArrayList<String>();
BufferedReader reader = new BufferedReader(new FileReader(CONFIG_FILE));
String line = null;
while ((line = reader.readLine()) != null) {
indexedDocuments.add(line);
}
reader.close();
for (File pdfFile : pdfFiles) {
if (!indexedDocuments.contains(pdfFile.getName())) {
System.out.println(pdfFile.getName().toString()+" running");
PDFReader pdfReader = new PDFReader();
DocStructure pdfIndexItem = pdfReader.index(pdfFile);
DocIndexer indexer=new DocIndexer(INDEX_DIR);
indexer.index(pdfIndexItem);
indexer.close();
System.out.println(pdfFile.getName().toString()+" Indexed");
try {
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(CONFIG_FILE, true)));
out.println(pdfFile.getName().toString());
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
System.out.println("-----------------------------COMPLETED--------------------------");
} catch (Exception e) {
System.out.println(e.getMessage());
return false;
}
return true;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment