berak/depth_from_single_img.txt

## depth_from_single_img.txt
code running https://berak.pythonanywhere.com/

run get_model.sh to download the pretrained 'unet.onnx'
wsgi.py is a webserver, receiving images of indoor scenes and sending back depth images.
up.html is the main webpage

## get_model.sh
#!/bin/bash
fileid="14n3cTmaKGJHIhL0sBRaByuyPBDyntHZm"
filename="unet.onnx"
curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}

## up.html
<!DOCTYPE html>
<html>
<head>
<title> Depth from single image </title>
<style>
  .main {
      font-family: Arial, "MS Trebuchet", sans-serif;  font-size: 12px;
      border:0;
      margin-top: 15px;  margin-bottom: 15px;  margin-right: 15px;  margin-left: 15px;
  }
  p,.bordered {
      margin-top: 5px;  margin-bottom: 5px;  margin-right: 5px;  margin-left: 5px;
  }
  .bordered {
      border-color:#777;
      border-style:solid;
  }
</style>
<script type="text/javascript">
  var ticks=0;

  function postCanvasToURL(url) { // this is the actual workhorse
    err.innerHTML = "... posting image";
    var type = "image/png"
    var data = document.getElementById("output").toDataURL(type);

    var xhr = new XMLHttpRequest();
    xhr.open('POST', url, true);
    xhr.setRequestHeader('Content-Type', "application/x-www-form-urlencoded");
    xhr.onreadystatechange = function(e) {
      if ( this.readyState > 3 ) {
        err.innerHTML  = "<p>&nbsp;<a href='/depth.png'>16bit depth png</a>"
        err.innerHTML += "<p>&nbsp;<a href='/depth.ply.gz'>zipped point cloud (ply.gz)</a><br>"
        render(this.response, "result");
      }
    }
    data = data.replace('data:' + type + ';base64,', '');
    xhr.send(data)
  }
  function render(src, dst){
    var image = new Image();
    image.onload = function(){
      var canvas = document.getElementById(dst);
      var MAX_W = 640;
      var MAX_H = 480;
      if (image.height > MAX_H || image.width > MAX_W) {
        var scale = (image.width < image.height) ?
          MAX_H / image.height :
          MAX_W / image.width ;
        image.height *= scale;
        image.width  *= scale;
      }
      var ctx = canvas.getContext("2d");
      ctx.clearRect(0, 0, canvas.width, canvas.height);
      canvas.width = image.width;
      canvas.height = image.height;
      ctx.drawImage(image, 0, 0, image.width, image.height);
    };
    image.src = src;
  }
  function _load_image(src, dst){
    if(!src.type.match(/image.*/)){
      console.log("The dropped file is not an image: ", src.type);
      return;
    }
    var reader = new FileReader();
    reader.onload = function(e){
      render(e.target.result, dst);
    };
    reader.readAsDataURL(src);
  }
  function loadImageDrag(src){
    _load_image(src, "output");
  }
  function loadImageFile(src){
    var up = document.getElementById("upload")
    var oFile = up.files[0];
    _load_image(oFile, "output");
  }
</script>
</head>

<body class="main">
<h2> Depth from a single image</h2>
  <div id="droparea">
      <canvas id="output" width="320" height="240" class="bordered" title="drop an image here"></canvas>
      <canvas id="result" width="320" height="240" class="bordered"></canvas>
      <div>
          <p>Drop files here, or</p>
          <p> <input id="upload" type="file" multiple onchange="loadImageFile();"/></p>
          <p> <input id="uploadbtn" type="button" value="Generate Depth Image" onclick="postCanvasToURL('/up');"/></p>
          <p> <div id="err"></div></p>
      </div>
  </div>
  <p><br><p>
  <font color=#999>
  <p><a href='https://github.com/karoly-hars/DE_resnet_unet_hyb'>DE_resnet_unet_hyb</a> trained on <a href="https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html">interior room images</a>
  <p>running on opencv's dnn ;)
  </font>
</body>

<script type="text/javascript">
  var target = document.getElementById("droparea");
  target.addEventListener("dragover", function(e){e.preventDefault();}, true);
  target.addEventListener("drop", function(e){
    e.preventDefault();
    loadImageDrag(e.dataTransfer.files[0]);
  }, true);
</script>
</html>

## wsgi.py
#!/usr/bin/python
import os, base64
import cv2, numpy as np
import math
import gzip

HEIGHT = 256
WIDTH = 320
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
net = cv2.dnn.readNet("unet.onnx")

ply_header = '''ply
format ascii 1.0
element vertex %(vert_num)d
property float x
property float y
property float z
property uchar red
property uchar green
property uchar blue
end_header
'''

def write_ply(img, depth, scale=1.0/255): # scale for 16bit !
    w = img.shape[1]
    h = img.shape[0]
    depth = cv2.resize(depth,(w,h), cv2.INTER_CUBIC)
    with gzip.open("depth.ply.gz", 'wb') as f:
        total = w*h
        f.write((ply_header % dict(vert_num=total)).encode('utf-8'))
        for y in range(h):
            for x in range(w):
                pix = img[y,x]
                z = scale * depth[y,x]
                s = b'%f %f %f %d %d %d\n' % (x,y,z, pix[2], pix[1], pix[0])
                f.write(s)
        f.close()

def depth_to_grayscale(depth, max_dist=10.0):
    """Transform a prediction into a grayscale 8-bit image."""
    depth = np.transpose(depth, (1, 2, 0))
    depth[depth > max_dist] = max_dist
    depth = depth / max_dist

    depth = np.array(depth * 255.0*255, dtype=np.uint16)
    depth = cv2.resize(depth, (WIDTH, HEIGHT))

    #bgr_depth_img = cv2.cvtColor(depth, cv2.COLOR_GRAY2BGR)
    depth_img = np.clip(depth, 0, 255*255)
    return depth_img

def scale_image(img, scale=None):
    """Resize/scale an image. If a scale is not provided, scale it closer to HEIGHT x WIDTH."""
    # if scale is None, scale to the longer size
    if scale is None:
        scale = max(WIDTH / img.shape[1], HEIGHT / img.shape[0])

    new_size = (math.ceil(img.shape[1] * scale), math.ceil(img.shape[0] * scale))
    image = cv2.resize(img, new_size, interpolation=cv2.INTER_NEAREST)
    return image


def center_crop(img):
    """Center crop an image to HEIGHT x WIDTH."""
    corner = ((img.shape[0] - HEIGHT) // 2, (img.shape[1] - WIDTH) // 2)
    img = img[corner[0]:corner[0] + HEIGHT, corner[1]:corner[1] + WIDTH]
    return img


def process(org):
    inWidth = org.shape[1]
    inHeight = org.shape[0]
    img = scale_image(org)
    img = center_crop(img)
    img = img.astype(np.float32) / 255.0
    img -= mean
    img /= std

    img = img.transpose(2,0,1)
    blob = img.reshape(1,img.shape[0],img.shape[1],img.shape[2])
    net.setInput(blob)
    res = net.forward()
    res = depth_to_grayscale(res[0,:,:,:])
    cv2.imwrite("depth.png", res)
    write_ply(org, res)
    draw = (res/255).astype(np.uint8)
    return cv2.resize(draw, (inWidth,inHeight))


def _read(fname):
    f = open(fname,"rb")
    r = f.read()
    f.close()
    return r

def application(environ, start_response):
    request_body=None
    retcode = '200 OK'
    resp = "dummy\r\n"
    ct  ="text/html"
    try:
       request_body_size = int(environ.get('CONTENT_LENGTH', 0))
       request_body = environ['wsgi.input'].read(request_body_size)
    except (ValueError):
       resp = "no response"
    url = environ['PATH_INFO'];
    if url == "/":
        resp = _read("up.html")
    elif url == "/dn":
        ct = 'image/png'
        resp = _read("my.png")
    elif url == "/depth.png":
        ct = 'image/png'
        resp = _read("depth.png")
    elif url == "/depth.ply.gz":
        ct = 'application/zip'
        resp = _read("depth.ply.gz")
    elif url == "/up" and request_body:
        ct = 'image/png'
        resp = request_body.replace(b'data:' + ct.encode('ascii') + b';base64,', b"")
        data = base64.b64decode(resp)
        buf = np.frombuffer(data, dtype=np.uint8)
        img = cv2.imdecode(buf, 1)
        img = process(img)
        cv2.imwrite("my.png", img)
        ok, enc = cv2.imencode(".png", img)
        resp = base64.b64encode(enc.tostring())
        resp = b'data:' + ct.encode('ascii') + b';base64,' + resp
    start_response(retcode, [('Content-Type', ct), ('Content-Length', str(len(resp)))])
    return [resp]
	code running https://berak.pythonanywhere.com/

	run get_model.sh to download the pretrained 'unet.onnx'
	wsgi.py is a webserver, receiving images of indoor scenes and sending back depth images.
	up.html is the main webpage
	#!/bin/bash
	fileid="14n3cTmaKGJHIhL0sBRaByuyPBDyntHZm"
	filename="unet.onnx"
	curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
	curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
	<!DOCTYPE html>
	<html>
	<head>
	<title> Depth from single image </title>
	<style>
	.main {
	font-family: Arial, "MS Trebuchet", sans-serif; font-size: 12px;
	border:0;
	margin-top: 15px; margin-bottom: 15px; margin-right: 15px; margin-left: 15px;
	}
	p,.bordered {
	margin-top: 5px; margin-bottom: 5px; margin-right: 5px; margin-left: 5px;
	}
	.bordered {
	border-color:#777;
	border-style:solid;
	}
	</style>
	<script type="text/javascript">
	var ticks=0;

	function postCanvasToURL(url) { // this is the actual workhorse
	err.innerHTML = "... posting image";
	var type = "image/png"
	var data = document.getElementById("output").toDataURL(type);

	var xhr = new XMLHttpRequest();
	xhr.open('POST', url, true);
	xhr.setRequestHeader('Content-Type', "application/x-www-form-urlencoded");
	xhr.onreadystatechange = function(e) {
	if ( this.readyState > 3 ) {
	err.innerHTML = "<p> <a href='/depth.png'>16bit depth png</a>"
	err.innerHTML += "<p> <a href='/depth.ply.gz'>zipped point cloud (ply.gz)</a><br>"
	render(this.response, "result");
	}
	}
	data = data.replace('data:' + type + ';base64,', '');
	xhr.send(data)
	}
	function render(src, dst){
	var image = new Image();
	image.onload = function(){
	var canvas = document.getElementById(dst);
	var MAX_W = 640;
	var MAX_H = 480;
	if (image.height > MAX_H \|\| image.width > MAX_W) {
	var scale = (image.width < image.height) ?
	MAX_H / image.height :
	MAX_W / image.width ;
	image.height *= scale;
	image.width *= scale;
	}
	var ctx = canvas.getContext("2d");
	ctx.clearRect(0, 0, canvas.width, canvas.height);
	canvas.width = image.width;
	canvas.height = image.height;
	ctx.drawImage(image, 0, 0, image.width, image.height);
	};
	image.src = src;
	}
	function _load_image(src, dst){
	if(!src.type.match(/image.*/)){
	console.log("The dropped file is not an image: ", src.type);
	return;
	}
	var reader = new FileReader();
	reader.onload = function(e){
	render(e.target.result, dst);
	};
	reader.readAsDataURL(src);
	}
	function loadImageDrag(src){
	_load_image(src, "output");
	}
	function loadImageFile(src){
	var up = document.getElementById("upload")
	var oFile = up.files[0];
	_load_image(oFile, "output");
	}
	</script>
	</head>

	<body class="main">
	<h2> Depth from a single image</h2>
	<div id="droparea">
	<canvas id="output" width="320" height="240" class="bordered" title="drop an image here"></canvas>
	<canvas id="result" width="320" height="240" class="bordered"></canvas>
	<div>
	<p>Drop files here, or</p>
	<p> <input id="upload" type="file" multiple onchange="loadImageFile();"/></p>
	<p> <input id="uploadbtn" type="button" value="Generate Depth Image" onclick="postCanvasToURL('/up');"/></p>
	<p> <div id="err"></div></p>
	</div>
	</div>
	<p><br><p>
	<font color=#999>
	<p><a href='https://github.com/karoly-hars/DE_resnet_unet_hyb'>DE_resnet_unet_hyb</a> trained on <a href="https://cs.nyu.edu/~silberman/datasets/nyu_depth_v2.html">interior room images</a>
	<p>running on opencv's dnn ;)
	</font>
	</body>

	<script type="text/javascript">
	var target = document.getElementById("droparea");
	target.addEventListener("dragover", function(e){e.preventDefault();}, true);
	target.addEventListener("drop", function(e){
	e.preventDefault();
	loadImageDrag(e.dataTransfer.files[0]);
	}, true);
	</script>
	</html>
	#!/usr/bin/python
	import os, base64
	import cv2, numpy as np
	import math
	import gzip

	HEIGHT = 256
	WIDTH = 320
	mean = [0.485, 0.456, 0.406]
	std = [0.229, 0.224, 0.225]
	net = cv2.dnn.readNet("unet.onnx")

	ply_header = '''ply
	format ascii 1.0
	element vertex %(vert_num)d
	property float x
	property float y
	property float z
	property uchar red
	property uchar green
	property uchar blue
	end_header
	'''

	def write_ply(img, depth, scale=1.0/255): # scale for 16bit !
	w = img.shape[1]
	h = img.shape[0]
	depth = cv2.resize(depth,(w,h), cv2.INTER_CUBIC)
	with gzip.open("depth.ply.gz", 'wb') as f:
	total = w*h
	f.write((ply_header % dict(vert_num=total)).encode('utf-8'))
	for y in range(h):
	for x in range(w):
	pix = img[y,x]
	z = scale * depth[y,x]
	s = b'%f %f %f %d %d %d\n' % (x,y,z, pix[2], pix[1], pix[0])
	f.write(s)
	f.close()

	def depth_to_grayscale(depth, max_dist=10.0):
	"""Transform a prediction into a grayscale 8-bit image."""
	depth = np.transpose(depth, (1, 2, 0))
	depth[depth > max_dist] = max_dist
	depth = depth / max_dist

	depth = np.array(depth * 255.0*255, dtype=np.uint16)
	depth = cv2.resize(depth, (WIDTH, HEIGHT))

	#bgr_depth_img = cv2.cvtColor(depth, cv2.COLOR_GRAY2BGR)
	depth_img = np.clip(depth, 0, 255*255)
	return depth_img

	def scale_image(img, scale=None):
	"""Resize/scale an image. If a scale is not provided, scale it closer to HEIGHT x WIDTH."""
	# if scale is None, scale to the longer size
	if scale is None:
	scale = max(WIDTH / img.shape[1], HEIGHT / img.shape[0])

	new_size = (math.ceil(img.shape[1] * scale), math.ceil(img.shape[0] * scale))
	image = cv2.resize(img, new_size, interpolation=cv2.INTER_NEAREST)
	return image


	def center_crop(img):
	"""Center crop an image to HEIGHT x WIDTH."""
	corner = ((img.shape[0] - HEIGHT) // 2, (img.shape[1] - WIDTH) // 2)
	img = img[corner[0]:corner[0] + HEIGHT, corner[1]:corner[1] + WIDTH]
	return img


	def process(org):
	inWidth = org.shape[1]
	inHeight = org.shape[0]
	img = scale_image(org)
	img = center_crop(img)
	img = img.astype(np.float32) / 255.0
	img -= mean
	img /= std

	img = img.transpose(2,0,1)
	blob = img.reshape(1,img.shape[0],img.shape[1],img.shape[2])
	net.setInput(blob)
	res = net.forward()
	res = depth_to_grayscale(res[0,:,:,:])
	cv2.imwrite("depth.png", res)
	write_ply(org, res)
	draw = (res/255).astype(np.uint8)
	return cv2.resize(draw, (inWidth,inHeight))


	def _read(fname):
	f = open(fname,"rb")
	r = f.read()
	f.close()
	return r

	def application(environ, start_response):
	request_body=None
	retcode = '200 OK'
	resp = "dummy\r\n"
	ct ="text/html"
	try:
	request_body_size = int(environ.get('CONTENT_LENGTH', 0))
	request_body = environ['wsgi.input'].read(request_body_size)
	except (ValueError):
	resp = "no response"
	url = environ['PATH_INFO'];
	if url == "/":
	resp = _read("up.html")
	elif url == "/dn":
	ct = 'image/png'
	resp = _read("my.png")
	elif url == "/depth.png":
	ct = 'image/png'
	resp = _read("depth.png")
	elif url == "/depth.ply.gz":
	ct = 'application/zip'
	resp = _read("depth.ply.gz")
	elif url == "/up" and request_body:
	ct = 'image/png'
	resp = request_body.replace(b'data:' + ct.encode('ascii') + b';base64,', b"")
	data = base64.b64decode(resp)
	buf = np.frombuffer(data, dtype=np.uint8)
	img = cv2.imdecode(buf, 1)
	img = process(img)
	cv2.imwrite("my.png", img)
	ok, enc = cv2.imencode(".png", img)
	resp = base64.b64encode(enc.tostring())
	resp = b'data:' + ct.encode('ascii') + b';base64,' + resp
	start_response(retcode, [('Content-Type', ct), ('Content-Length', str(len(resp)))])
	return [resp]