Skip to content

Instantly share code, notes, and snippets.

@barakplasma
Last active March 8, 2021 17:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save barakplasma/40b8a8aea8937cc9c1e7de78a30e5f4a to your computer and use it in GitHub Desktop.
Save barakplasma/40b8a8aea8937cc9c1e7de78a30e5f4a to your computer and use it in GitHub Desktop.
Tesseract OCR from Webcam
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Tesseract.js Video Streaming Recognition</title>
<link rel="stylesheet" href="style.css">
<script src='https://unpkg.com/tesseract.js@v2.0.0-beta.1/dist/tesseract.min.js'></script>
</head>
<body>
<div id="root">
<video id="poem-video" width="640" height="360" crossorigin="anonymous">
</video>
<div id="sep"></div>
<div id="messages">
</div>
</div>
<script>
const { createWorker, createScheduler } = Tesseract;
const scheduler = createScheduler();
const video = document.getElementById('poem-video');
if (navigator.mediaDevices.getUserMedia) {
navigator.mediaDevices.getUserMedia({ video: true })
.then(function (stream) {
video.srcObject = stream;
})
.catch(function (err0r) {
console.log("Something went wrong!");
});
}
const messages = document.getElementById('messages');
let timerId = null;
const addMessage = (m, bold) => {
let msg = `<p>${m}</p>`;
if (bold) {
msg = `<p class="bold">${m}</p>`;
}
messages.innerHTML += msg;
messages.scrollTop = messages.scrollHeight;
}
const doOCR = async () => {
const c = document.createElement('canvas');
c.width = 640;
c.height = 360;
c.getContext('2d').drawImage(video, 0, 0, 640, 360);
const start = new Date();
const { data: { text } } = await scheduler.addJob('recognize', c);
const end = new Date()
addMessage(`[${start.getMinutes()}:${start.getSeconds()} - ${end.getMinutes()}:${end.getSeconds()}], ${(end - start) / 1000} s`);
text.split('\n').forEach((line) => {
addMessage(line);
});
};
(async () => {
addMessage('Initializing Tesseract.js');
for (let i = 0; i < 4; i++) {
const worker = createWorker();
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
scheduler.addWorker(worker);
}
addMessage('Initialized Tesseract.js');
video.addEventListener('play', () => {
timerId = setInterval(doOCR, 1000);
});
video.addEventListener('pause', () => {
clearInterval(timerId);
});
addMessage('Now you can play the video. :)');
video.controls = true;
})();
</script>
</body>
</html>
{
"name": "tesseract.js-video",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"start": "live-server"
},
"author": "",
"license": "ISC",
"dependencies": {
"live-server": "latest"
}
}
html, body {
width: 100%;
height: 100%;
margin: 0;
}
.bold {
font-weight: bold;
}
#root {
width: 100%;
height: 100%;
display: flex;
flex-direction: row;
justify-content: center;
align-items: center;
padding: 0px 16px;
}
#sep {
width: 16px;
}
#messages {
width: 640px;
height: 360px;
overflow: auto;
}

Tesseract.js based OCR from Webcam

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment