Skip to content

Instantly share code, notes, and snippets.

@javilobo8
Last active April 29, 2019 18:54
Show Gist options
  • Save javilobo8/951de419a72c1d2c829851448dd909b1 to your computer and use it in GitHub Desktop.
Save javilobo8/951de419a72c1d2c829851448dd909b1 to your computer and use it in GitHub Desktop.
const Tesseract = require('tesseract.js');
const sharp = require('sharp');
const cv = require('opencv4nodejs');
const fs = require('fs');
const EROSION_SIZE = 1;
const EROSION_MODE = cv.MORPH_ERODE;
const SCALE = 2;
const ITERATIONS = 2;
const ANCHOR = new cv.Point2(EROSION_SIZE, EROSION_SIZE);
const dirs = fs.readdirSync('captchas');
function runTesseract() {
return Tesseract.recognize('output_edited_opencv.png', {
tessedit_char_whitelist: '0123456789',
tessedit_char_blacklist: ' ',
tessedit_pageseg_mode: 5,
tessedit_ocr_engine_mode: 1,
}).then(result => console.log(String(result.text).replace(/[\s\n\r]/ig, '')));
}
sharp(`./captchas/${dirs[6]}`)
.negate()
.removeAlpha()
.threshold(50, { grayscale: true })
.toFile('output_edited.png')
.then(() => cv.imreadAsync('output_edited.png'))
.then(img => img.resizeAsync(
SCALE * img.sizes[0],
SCALE * img.sizes[1],
2, 2,
cv.INTER_NEAREST,
))
// .then(img => img.erodeAsync(new cv.Mat(), ANCHOR, ITERATIONS, EROSION_MODE))
.then(img => img.morphologyExAsync(new cv.Mat(), cv.MORPH_OPEN, ANCHOR, ITERATIONS, EROSION_MODE))
.then(img => cv.imwriteAsync('output_edited_opencv.png', img))
.then(runTesseract);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment