Created
January 17, 2014 12:47
-
-
Save UmarIqbal/8472841 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package searchIndexer; | |
import java.io.BufferedReader; | |
import java.io.BufferedWriter; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileNotFoundException; | |
import java.io.FileReader; | |
import java.io.FileWriter; | |
import java.io.FilenameFilter; | |
import java.io.IOException; | |
import java.io.PrintWriter; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.List; | |
import java.util.Properties; | |
public class DocSearcher { | |
// location where the index will be stored. | |
private String INDEX_DIR; | |
private String FILES_DIR; | |
private String CONFIG_FILE; | |
public DocSearcher() { | |
Properties prop = new Properties(); | |
try { | |
prop.load(new FileInputStream("path.properties")); | |
} catch (FileNotFoundException e) { | |
e.printStackTrace(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
INDEX_DIR = prop.getProperty("INDEX_DIR"); | |
FILES_DIR = prop.getProperty("FILES_DIR"); | |
CONFIG_FILE = prop.getProperty("CONFIG_FILE"); | |
// INDEX_DIR = "C:\\Documents and Settings\\uiqbal\\Desktop\\IndexFolder"; | |
// FILES_DIR = "C:\\Documents and Settings\\uiqbal\\Desktop\\IndexFiles\\"; | |
// CONFIG_FILE = "C:\\Documents and Settings\\uiqbal\\Desktop\\IndexFolder\\Indexconfig.txt"; | |
} | |
public static void main(String[] args) { | |
DocSearcher docSearcher=new DocSearcher(); | |
docSearcher.createIndex(); | |
} | |
public boolean createIndex() { | |
try { | |
System.out.println("-----------------------------INDEXING--------------------------"); | |
List<File> pdfFiles = new ArrayList<File>(Arrays.asList(new File(FILES_DIR).listFiles(new FilenameFilter() { | |
@Override | |
public boolean accept(File dir, String name) { | |
// TODO Auto-generated method stub | |
return name.endsWith(".pdf"); | |
} | |
} ))); | |
List<String> indexedDocuments=new ArrayList<String>(); | |
BufferedReader reader = new BufferedReader(new FileReader(CONFIG_FILE)); | |
String line = null; | |
while ((line = reader.readLine()) != null) { | |
indexedDocuments.add(line); | |
} | |
reader.close(); | |
for (File pdfFile : pdfFiles) { | |
if (!indexedDocuments.contains(pdfFile.getName())) { | |
System.out.println(pdfFile.getName().toString()+" running"); | |
PDFReader pdfReader = new PDFReader(); | |
DocStructure pdfIndexItem = pdfReader.index(pdfFile); | |
DocIndexer indexer=new DocIndexer(INDEX_DIR); | |
indexer.index(pdfIndexItem); | |
indexer.close(); | |
System.out.println(pdfFile.getName().toString()+" Indexed"); | |
try { | |
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(CONFIG_FILE, true))); | |
out.println(pdfFile.getName().toString()); | |
out.close(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
} | |
System.out.println("-----------------------------COMPLETED--------------------------"); | |
} catch (Exception e) { | |
System.out.println(e.getMessage()); | |
return false; | |
} | |
return true; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment