Skip to content

Instantly share code, notes, and snippets.

Created January 28, 2016 03:57
Show Gist options
  • Save jcjohnson/ad61c6169274694e9b03 to your computer and use it in GitHub Desktop.
Save jcjohnson/ad61c6169274694e9b03 to your computer and use it in GitHub Desktop.
$(function() {
var SERVER_URL = null;
var video_active = false;
var demo_running = false;
var NUM_TO_SHOW = 20;
var FONT_SIZE = 48;
var TEXT_BOX_PAD = 5;
var PAD = 10;
// A nice set of colors
var WAD_COLORS = [
"rgb(173, 35, 35)", // Red
"rgb(42, 75, 215)", // Blue
"rgb(87, 87, 87)", // Dark Gray
"rgb(29, 105, 20)", // Green
"rgb(129, 74, 25)", // Brown
"rgb(129, 38, 192)", // Purple
"rgb(160, 160, 160)", // Lt Gray
"rgb(129, 197, 122)", // Lt green
"rgb(157, 175, 255)", // Lt blue
"rgb(41, 208, 208)", // Cyan
"rgb(255, 146, 51)", // Orange
"rgb(255, 238, 51)", // Yellow
"rgb(233, 222, 187)", // Tan
"rgb(255, 205, 243)", // Pink
// "rgb(255, 255, 255)", // White
"rgb(0, 0, 0)", // Black
// Overall, the client-side program flow works like this:
// First, we request a meadia stream object to access the webcam; once we have
// it we bind it to a hidden <video> element and play the video. Now we can
// talk to the server. To grab a frame, we write the video to a hidden canvas
// and get json-encoded pixel data from the canvas as a DataURL. We pass this
// to the server, which responds with annotations; we draw the image and
// annotations to a second (visible) canvas, and repeat. Note that the client
// does not include any sleeping; it should show frames as fast as the server
// can process them. Also, to make the whole thing dummy-proof, we read the
// server URL from a URL parameter. This should theoretically run on Android
// but I haven't tested it; unfortunately it won't work on iOS.
function get_url_param(name) {
name = name.replace(/[\[]/, "\\[").replace(/[\]]/, "\\]");
var regex = new RegExp("[\\?&]" + name + "=([^&#]*)"),
results = regex.exec(;
if (results === null) return '';
return decodeURIComponent(results[1].replace(/\+/g, " "));
// Draw image and annotations to the main canvas.
function draw_image(image_url, data) {
var pos = {
x: 0,
y: 0,
h: IMAGE_DISPLAY_WIDTH / data.width * data.height,
var canvas = document.getElementById('canvas');
var ctx = canvas.getContext('2d');
// We need to make a deep copy of pos since we don't use it right away
pos = JSON.parse(JSON.stringify(pos));
var img = new Image();
img.onload = function() {
var ori_height = img.height;
var ori_width = img.width;
// First draw a white rectangle over everything;
ctx.fillStyle = 'rgb(255, 255, 255)';
ctx.rect(0, 0, canvas.width, canvas.height);
ctx.drawImage(img, pos.x, pos.y, pos.w, pos.h);
for (var i = 0; i < NUM_TO_SHOW && i < data.boxes.length; i++) {
var box = data.boxes[i];
var x = box[0], y = box[1],
w = box[2], h = box[3];
// We need to convert the box from the image-space coordinate system
// to the coordinate system of the canvas where we are drawing it.
// Also the input boxes are 1-indexed, and we want 0-indexed here.
x = x * (pos.w / img.width) + pos.x;
y = y * (pos.h / img.height) + pos.y;
w = w * (pos.w / img.width);
h = h * (pos.h / img.height);
// Draw the box;
ctx.lineWidth = BOX_LINE_WIDTH;
// ctx.strokeStyle = colors.foreground[i];
ctx.strokeStyle = WAD_COLORS[i % WAD_COLORS.length];
ctx.rect(x, y, w, h);
// Now draw the text;
ctx.font = '18px sans-serif';
ctx.textBaseline = 'top';
var text_width = ctx.measureText(data.captions[i]).width;;
ctx.globalAlpha = 0.5;
ctx.fillStyle = WAD_COLORS[i % WAD_COLORS.length];
ctx.fillRect(x, y, text_width, 20);
ctx.fillText(data.captions[i], x, y);
img.src = image_url;
// Grab an image from the webcam, send it to the server, and draw the results.
function captureImage() {
// Make sure that the video is active.
if (!video_active) return;
// By this point the webcam is streaming to the video object.
// To get a frame, we draw the video to a canvas and then pull a data URL
// from the canvas that has encoded pixel data.
var video = document.getElementById('video');
var img_canvas = document.getElementById('img-canvas');
img_canvas.width = video.videoWidth;
img_canvas.height = video.videoHeight;
var ctx = img_canvas.getContext('2d');
ctx.drawImage(video, 0, 0, video.videoWidth, video.videoHeight);
// TODO: jpeg might not be supported on all browsers;
// detect this somehow and fall back to png
var img_url = img_canvas.toDataURL('image/jpeg');
// Send the frame to the server
var request = new XMLHttpRequest();'POST', SERVER_URL, true);
request.setRequestHeader('Content-Type', 'application/upload');
request.onload = function(e) {
// Once we have the response, render it and loop.
var annotations = JSON.parse(request.responseText);
draw_image(img_url, annotations);
if (demo_running) captureImage();
request.send('img=' + img_url);
function success(stream) {
var video = document.getElementById('video');
video.addEventListener('canplay', function() {
// Once the video is ready, set a flag and enable all buttons.
video_active = true;
var btn_ids = [
'#btn-less', '#btn-more',
'#btn-start', '#btn-stop',
'#btn-smaller', '#btn-bigger',
for (var i = 0; i < btn_ids.length; i++) {
$(btn_ids[i]).attr('disabled', false);
// Bind the webcam stream to the video object in the DOM.
var vendorURL = window.URL || window.webkitURL;
video.src = vendorURL.createObjectURL(stream);;
function errorCallback(error) {
console.log('ERROR: ', error);
// TODO: If these don't exist then show some sort of error message?
navigator.getUserMedia = navigator.getUserMedia ||
navigator.webkitGetUserMedia || navigator.mozGetUserMedia;
var constraints = {
audio: false,
video: true,
navigator.getUserMedia(constraints, success, errorCallback);
// Add logic to buttons.
$('#btn-start').click(function() {
SERVER_URL = get_url_param('server_url');
demo_running = true;
$('#btn-stop').click(function() { demo_running = false; });
$('#btn-less').click(function() { NUM_TO_SHOW--; });
$('#btn-more').click(function() { NUM_TO_SHOW++; });
$('#btn-smaller').click(function() { IMAGE_DISPLAY_WIDTH -= 100; });
$('#btn-bigger').click(function() { IMAGE_DISPLAY_WIDTH += 100; });
<title>View Captioning results</title>
<script src=""></script>
<script src=""></script>
<link href="" rel="stylesheet">
#paragraph-div {
width: 800px;
span.caption {
// padding: 14px;
// border-style: solid;
// border-width: 4px;
padding-right: 10px;
font-size: 28px;
<div class='container-fluid'>
<h1>Captioning Output</h1>
<div class='col-xs-12' id='results-div'>
<div class='row text-center'>
<button class='btn btn-lg btn-primary' id='btn-less'>Fewer</button>
<button class='btn btn-lg btn-primary' id='btn-more'>More</button>
<canvas id='canvas' height='800px' width='1400px'>
<div class='row' id='paragraph-div'>
$(function() {
var JSON_URL = 'curframe.json';
// var image_url = '/img.jpg';
var IMAGE_URL = 'curframe.jpg';
var CANVAS_WIDTH = 1400;
var CANVAS_HEIGHT = 400;
var FONT_SIZE = 48;
var TEXT_BOX_PAD = 5;
var PAD = 10;
var BOX_COLORS = [
"rgb(235, 81, 71)",
"rgb(143, 235, 71)",
"rgb(71, 225, 235)",
"rgb(163, 71, 235)"
var BG_COLORS = [
"rgb(224, 186, 184)",
"rgb(202, 224, 184)",
"rgb(184, 222, 224)",
"rgb(206, 184, 224)"
var WAD_COLORS = [
"rgb(173, 35, 35)", // Red
"rgb(42, 75, 215)", // Blue
"rgb(87, 87, 87)", // Dark Gray
"rgb(29, 105, 20)", // Green
"rgb(129, 74, 25)", // Brown
"rgb(129, 38, 192)", // Purple
"rgb(160, 160, 160)", // Lt Gray
"rgb(129, 197, 122)", // Lt green
"rgb(157, 175, 255)", // Lt blue
"rgb(41, 208, 208)", // Cyan
"rgb(255, 146, 51)", // Orange
"rgb(255, 238, 51)", // Yellow
"rgb(233, 222, 187)", // Tan
"rgb(255, 205, 243)", // Pink
// "rgb(255, 255, 255)", // White
"rgb(0, 0, 0)", // Black
function get_random_color() {
var h = Math.round(360 * Math.random());
var s = 0.5;
var bg_s = 0.25;
var l = 0.25;
var color = 'hsl(' + h + ', ' + s + '%, ' + l + '%)';
var bg_color = 'hsl(' + h + ', ' + bg_s + '%, ' + l + '%)';
return [color, bg_color];
function get_colors(num_colors) {
var h_step = Math.round(360 / num_colors);
var colors = {
foreground: [],
background: [],
var h = 0;
var fg_s = 50;
var bg_s = 25;
var fg_l = 60;
var bg_l = 80;
for (var i = 0; i < num_colors; i++) {
var fg_color = 'hsl(' + h + ', ' + fg_s + '%, ' + fg_l + '%)';
var bg_color = 'hsl(' + h + ', ' + bg_s + '%, ' + bg_l + '%)';
h += h_step;
return colors;
function draw_image(ctx, image_url, data, pos, num_to_show) {
// We need to make a deep copy of pos since we don't use it right away
pos = JSON.parse(JSON.stringify(pos));
var img = new Image();
img.onload = function() {
var ori_height = img.height;
var ori_width = img.width;
ctx.drawImage(img, pos.x, pos.y, pos.w, pos.h);
var colors = get_colors(num_to_show);
for (var i = 0; i < num_to_show && i < data.boxes.length; i++) {
var box = data.boxes[i];
var x = box[0], y = box[1],
w = box[2], h = box[3];
// We need to convert the box from the image-space coordinate system
// to the coordinate system of the canvas where we are drawing it.
// Also the input boxes are 1-indexed, and we want 0-indexed here.
x = x * (pos.w / img.width) + pos.x;
y = y * (pos.h / img.height) + pos.y;
w = w * (pos.w / img.width);
h = h * (pos.h / img.height);
// Draw the box;
ctx.lineWidth = BOX_LINE_WIDTH;
// ctx.strokeStyle = colors.foreground[i];
ctx.strokeStyle = WAD_COLORS[i % WAD_COLORS.length];
ctx.rect(x, y, w, h);
// Now draw the text;
ctx.font = '18px sans-serif';
ctx.textBaseline = 'top';
var text_width = ctx.measureText(data.captions[i]).width;;
ctx.globalAlpha = 0.5;
ctx.fillStyle = WAD_COLORS[i % WAD_COLORS.length];
ctx.fillRect(x, y, text_width, 20);
ctx.fillText(data.captions[i], x, y);
img.src = image_url;
function draw_text(ctx, pos, captions, num_to_show) {
var th = (pos.h - (num_to_show - 1) * TEXT_BOX_PAD) / num_to_show;
var by = pos.y;
var ty = th / 2;
var paragraph_div = $('#paragraph-div').empty();
var colors = get_colors(num_to_show);
for (var i = 0; i < num_to_show && i < captions.length; i++) {;
ctx.strokeStyle = colors.foreground[i];
ctx.strokeStyle = WAD_COLORS[i % WAD_COLORS.length];
ctx.fillStyle = colors.background[i];
ctx.lineWidth = 3;
ctx.rect(pos.x, by, pos.w, th);
ctx.font = Math.round(0.5 * th) + 'px sans-serif';
ctx.textBaseline = 'middle';
ctx.fillStyle = '#000000';
ctx.fillText(captions[i], pos.x + TEXT_BOX_PAD, ty);
by += th + TEXT_BOX_PAD;
ty += th + TEXT_BOX_PAD;
function main() {
var t = 0;
var canvas = document.getElementById('canvas');
var ctx = canvas.getContext('2d');
var buf_canvas = document.createElement('canvas');
var buf_ctx = buf_canvas.getContext('2d');
function refresh() {
t += 1;
var image_url = IMAGE_URL + '?id=' + t;
var json_url = JSON_URL + '?id=' + t;
function data_handler(data) {
var display_width = 1200;
var num_to_show = 20;
var display_height = display_width / data.width * data.height;
var img_pos = {x: 0, y: 0, w: display_width, h: display_height};
var text_pos = {
x: display_width + 10, y: 0,
w: 400, h: display_height,
function render() {
buf_canvas.width = buf_canvas.width;
draw_image(ctx, image_url, data, img_pos, num_to_show);
// draw_text(ctx, text_pos, data.captions, num_to_show);
// ctx.drawImage(buf_canvas, 0, 0);
$.getJSON(json_url, data_handler);
window.setTimeout(refresh, 50);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment