Created
September 4, 2021 23:00
-
-
Save bmorrisondev/846f4fcd6746cdf5510e9450e55907c0 to your computer and use it in GitHub Desktop.
Ocr stuffz
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const axios = require('axios') | |
const fs = require('fs') | |
const { PDFImage } = require('pdf-image') | |
const { createWorker } = require('tesseract.js') | |
async function ocrStuffz(fileName) { | |
let pdfImage = new PDFImage(fileName, { combinedImage: true }) | |
let convertedImage = await pdfImage.convertFile() | |
const worker = createWorker({ | |
logger: m => console.log(m) | |
}); | |
await worker.load(); | |
await worker.loadLanguage('eng'); | |
await worker.initialize('eng'); | |
const { data: { text } } = await worker.recognize(convertedImage); | |
await worker.terminate(); | |
return text; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment