Skip to content

Instantly share code, notes, and snippets.

@javilobo8 javilobo8/ocr.js
Last active Apr 29, 2019

Embed
What would you like to do?
const Tesseract = require('tesseract.js');
const sharp = require('sharp');
const cv = require('opencv4nodejs');
const fs = require('fs');
const EROSION_SIZE = 1;
const EROSION_MODE = cv.MORPH_ERODE;
const SCALE = 2;
const ITERATIONS = 2;
const ANCHOR = new cv.Point2(EROSION_SIZE, EROSION_SIZE);
const dirs = fs.readdirSync('captchas');
function runTesseract() {
return Tesseract.recognize('output_edited_opencv.png', {
tessedit_char_whitelist: '0123456789',
tessedit_char_blacklist: ' ',
tessedit_pageseg_mode: 5,
tessedit_ocr_engine_mode: 1,
}).then(result => console.log(String(result.text).replace(/[\s\n\r]/ig, '')));
}
sharp(`./captchas/${dirs[6]}`)
.negate()
.removeAlpha()
.threshold(50, { grayscale: true })
.toFile('output_edited.png')
.then(() => cv.imreadAsync('output_edited.png'))
.then(img => img.resizeAsync(
SCALE * img.sizes[0],
SCALE * img.sizes[1],
2, 2,
cv.INTER_NEAREST,
))
// .then(img => img.erodeAsync(new cv.Mat(), ANCHOR, ITERATIONS, EROSION_MODE))
.then(img => img.morphologyExAsync(new cv.Mat(), cv.MORPH_OPEN, ANCHOR, ITERATIONS, EROSION_MODE))
.then(img => cv.imwriteAsync('output_edited_opencv.png', img))
.then(runTesseract);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.