Last active
February 5, 2023 23:48
-
-
Save AlexandroMtzG/65e6ff82ed3b961c71391138f9648350 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Tesseract from "tesseract.js"; | |
import PdfService from "./PdfService"; | |
export const OcrTesseractLanguages = [ | |
{ name: "English", value: "eng" }, | |
{ name: "Spanish", value: "spa" }, | |
]; | |
async function scan(file: string, lang: string): Promise<string> { | |
return await new Promise(async (resolve, reject) => { | |
try { | |
if (file.endsWith(".pdf") || file.startsWith("data:application/pdf")) { | |
const images = await PdfService.convertToImages({ file }); | |
console.log({ images }); | |
let text = ""; | |
for (const image of images) { | |
text += await scanImage(image.base64, lang); | |
} | |
// const text = await PdfService.convertToText(file); | |
resolve(text); | |
} else { | |
// OCR | |
const text = await scanImage(file, lang); | |
resolve(text); | |
} | |
} catch (e) { | |
// eslint-disable-next-line no-console | |
console.log(e); | |
reject(e); | |
} | |
}); | |
} | |
async function scanImage(file: string, lang: string): Promise<string> { | |
// eslint-disable-next-line no-console | |
console.log("[OCR] Scanning image: ", file, lang); | |
return await new Promise(async (resolve, reject) => { | |
let image = file; | |
await Tesseract.recognize(image, lang, { | |
logger: (m) => { | |
// eslint-disable-next-line no-console | |
console.log("[OCR] Logger: ", JSON.stringify(m)); | |
}, | |
}) | |
.then(({ data: { text } }) => { | |
// eslint-disable-next-line no-console | |
console.log("[OCR] Result: ", text); | |
resolve(text); | |
}) | |
.catch((e) => { | |
// eslint-disable-next-line no-console | |
console.log("[OCR] Error: ", e); | |
reject(e); | |
}); | |
}); | |
} | |
export default { | |
scan, | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment