Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
package strimillinn.core.ocr;
import java.io.File;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.LongAdder;
import org.apache.cayenne.query.SelectQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import strimillinn.core.StrimillinnCore;
import strimillinn.core.model.Receipt;
public class OCRExecutor {
private static final Logger logger = LoggerFactory.getLogger( OCRExecutor.class );
public void run() {
int numberOfThreads = Runtime.getRuntime().availableProcessors();
ExecutorService es = Executors.newFixedThreadPool( numberOfThreads );
SelectQuery<Receipt> query = new SelectQuery<>( Receipt.class );
query.addPrefetch( Receipt.DOCUMENT.joint() );
LongAdder count = new LongAdder();
StrimillinnCore.newContext().iterate( query, receipt -> {
es.execute( () -> {
count.increment();
Receipt localReceipt = StrimillinnCore.newContext().localObject( receipt );
logger.info( "Processing:" + localReceipt.creationDate() + " : " + count );
File file = localReceipt.document().file();
String string = null;
try {
string = new OCREngineTesseract().parseFile( file );
}
catch( OCRException e ) {
e.printStackTrace();
}
localReceipt.setText( string, "tesseract" );
localReceipt.getObjectContext().commitChanges();
} );
} );
es.shutdown();
logger.info( "Done!" );
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment