ashwani-rathee/handgesturerecogniser.jl

## handgesturerecogniser.jl
### A Pluto.jl notebook ###
# v0.17.3

using Markdown
using InteractiveUtils

# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
macro bind(def, element)
    quote
        local iv = try Base.loaded_modules[Base.PkgId(Base.UUID("6e696c72-6542-2067-7265-42206c756150"), "AbstractPlutoDingetjes")].Bonds.initial_value catch; b -> missing; end
        local el = $(esc(element))
        global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : iv(el)
        el
    end
end

# ╔═╡ 4934e4de-b03d-419f-a076-9a8116f5ddf5
begin
using Pkg;
Pkg.activate(".") ;
end

# ╔═╡ 14519106-d4cf-4a77-acca-a22b7c426334
using Cairo, Images, ImageDraw, Luxor, LinearAlgebra, LazySets, StaticArrays

# ╔═╡ 0da24d63-180f-4913-a8d6-3ba54a28ef04
md"""
### Basic Hand Gesture Recognition using convexity defects

We want to detect gestures from hands using classical image processing methods at this moment

###### Steps
- We recieve real time image from Javascript
- Define region of interest in the image and get that part of image
- Convert ROI image to HSV space, then threshold it to skin range, we recieve a binary mask
- We use map window and dilation on the mask to reduce noise(quite robust actually)
- Find contours from the mask now(suzuki and abe algorithm)
- Find convexhull on the binary mask to find the convex hull(from ImageMorphology.jl)
- Now we have contour points and convexhull of the mask, we find the convexity defects

###### Convexity Defects wasn't available in julia so wrote my own
Explanation and Interesting ideas on convexity defects will be done later

-  After we find num and location of convexity defects, we plot the points and num+1 as the number in image using Luxor.jl
- This happens every 100ms

"""

# ╔═╡ 841cd0d1-c5d4-41fe-949f-b2ddc9144634
md"""
![](https://i.imgur.com/KQ5V3hJ.png)
"""

# ╔═╡ 43f08085-b9b3-4e9b-b2ff-a0907b48a897
begin

### Important
### contour related utils

# rotate direction clocwise
function clockwise(dir)
    return (dir)%8 + 1
end

# rotate direction counterclocwise
function counterclockwise(dir)
    return (dir+6)%8 + 1
end

# move from current pixel to next in given direction
function move(pixel, image, dir, dir_delta)
    newp = pixel + dir_delta[dir]
    height, width = size(image)
    if (0 < newp[1] <= height) &&  (0 < newp[2] <= width)
        if image[newp]!=0
            return newp
        end
    end
    return CartesianIndex(0, 0)
end

# finds direction between two given pixels
function from_to(from, to, dir_delta)
    delta = to-from
    return findall(x->x == delta, dir_delta)[1]
end


function detect_move(image, p0, p2, nbd, border, done, dir_delta)
    dir = from_to(p0, p2, dir_delta)
    moved = clockwise(dir)
    p1 = CartesianIndex(0, 0)
    while moved != dir ## 3.1
        newp = move(p0, image, moved, dir_delta)
        if newp[1]!=0
            p1 = newp
            break
        end
        moved = clockwise(moved)
    end

    if p1 == CartesianIndex(0, 0)
        return
    end

    p2 = p1 ## 3.2
    p3 = p0 ## 3.2
    done .= false
    while true
        dir = from_to(p3, p2, dir_delta)
        moved = counterclockwise(dir)
        p4 = CartesianIndex(0, 0)
        done .= false
        while true ## 3.3
            p4 = move(p3, image, moved, dir_delta)
            if p4[1] != 0
                break
            end
            done[moved] = true
            moved = counterclockwise(moved)
        end
        push!(border, p3) ## 3.4
        if p3[1] == size(image, 1) || done[3]
            image[p3] = -nbd
        elseif image[p3] == 1
            image[p3] = nbd
        end

        if (p4 == p0 && p3 == p1) ## 3.5
            break
        end
        p2 = p3
        p3 = p4
    end
end


function find_contours(image)
    nbd = 1
    lnbd = 1
    image = Float64.(image)
    contour_list =  Vector{typeof(CartesianIndex[])}()
    done = [false, false, false, false, false, false, false, false]

    # Clockwise Moore neighborhood.
    dir_delta = [CartesianIndex(-1, 0) , CartesianIndex(-1, 1), CartesianIndex(0, 1), CartesianIndex(1, 1), CartesianIndex(1, 0), CartesianIndex(1, -1), CartesianIndex(0, -1), CartesianIndex(-1,-1)]

    height, width = size(image)

    for i=1:height
        lnbd = 1
        for j=1:width
            fji = image[i, j]
            is_outer = (image[i, j] == 1 && (j == 1 || image[i, j-1] == 0)) ## 1 (a)
            is_hole = (image[i, j] >= 1 && (j == width || image[i, j+1] == 0))

            if is_outer || is_hole
                # 2
                border = CartesianIndex[]

                from = CartesianIndex(i, j)

                if is_outer
                    nbd += 1
                    from -= CartesianIndex(0, 1)

                else
                    nbd += 1
                    if fji > 1
                        lnbd = fji
                    end
                    from += CartesianIndex(0, 1)
                end

                p0 = CartesianIndex(i,j)
                detect_move(image, p0, from, nbd, border, done, dir_delta) ## 3
                if isempty(border) ##TODO
                    push!(border, p0)
                    image[p0] = -nbd
                end
                push!(contour_list, border)
            end
            if fji != 0 && fji != 1
                lnbd = abs(fji)
            end

        end
    end
    return contour_list
end

# a contour is a vector of 2 int arrays
function draw_contour(image, color, contour)
    for ind in contour
        image[ind] = color
    end
end
function draw_contours(image, color, contours)
    for cnt in contours
        draw_contour(image, color, cnt)
    end
end

end

# ╔═╡ f5642319-05ee-4731-ad26-80bcd4f6aa7b
begin

### Important
### Webcam related utils

function camera_input(;max_size=200, default_url="https://i.imgur.com/SUmi94P.png")
"""
<span class="pl-image waiting-for-permission">
<style>

	.pl-image.popped-out {
		position: fixed;
		top: 0;
		right: 0;
		z-index: 5;
	}
	.pl-image #video-container {
		width: 250px;
	}
	.pl-image video {
		# border-radius: 1rem 1rem 0 0;
	}
	.pl-image.waiting-for-permission #video-container {
		display: none;
	}
	.pl-image #prompt {
		display: none;
	}
	.pl-image.waiting-for-permission #prompt {
		width: 250px;
		height: 200px;
		display: grid;
		place-items: center;
		font-family: monospace;
		font-weight: bold;
		text-decoration: underline;
		cursor: pointer;
		border: 5px dashed rgba(0,0,0,.5);
	}
	.pl-image video {
		display: block;
	}
	.pl-image .bar {
		width: inherit;
		display: flex;
		z-index: 6;
	}
	.pl-image .bar#top {
		position: absolute;
		flex-direction: column;
	}

	.pl-image .bar#bottom {
		background: black;
		# border-radius: 0 0 1rem 1rem;
	}
	.pl-image .bar button {
		flex: 0 0 auto;
		background: rgba(255,255,255,.8);
		border: none;
		width: 2rem;
		height: 2rem;
		border-radius: 100%;
		cursor: pointer;
		z-index: 7;
	}
	.pl-image .bar button#shutter {
		width: 3rem;
		height: 3rem;
		margin: -1.5rem auto .2rem auto;
	}
	.pl-image video.takepicture {
		animation: pictureflash 0ms linear;
	}
	@keyframes pictureflash {
		0% {
			filter: grayscale(1.0) contrast(2.0);
		}
		100% {
			filter: grayscale(0.0) contrast(1.0);
		}
	}
</style>
	<div id="video-container">
		<div id="top" class="bar">
			<button id="stop" title="Stop video">✖</button>
			<button id="pop-out" title="Pop out/pop in">⏏</button>
		</div>
		<video playsinline autoplay></video>
		<div id="bottom" class="bar">
		<button id="shutter" title="Click to take a picture">📷</button>
		</div>
	</div>

	<div id="prompt">
		<span>
		Enable webcam
		</span>
	</div>
<script>
	// based on https://github.com/fonsp/printi-static (by the same author)
	const span = currentScript.parentElement
	const video = span.querySelector("video")
	const popout = span.querySelector("button#pop-out")
	const stop = span.querySelector("button#stop")
	const shutter = span.querySelector("button#shutter")
	const prompt = span.querySelector(".pl-image #prompt")
	const maxsize = $(max_size)
	const send_source = (source, src_width, src_height) => {
		const scale = Math.min(1.0, maxsize / src_width, maxsize / src_height)
		const width = Math.floor(src_width * scale)
		const height = Math.floor(src_height * scale)
		const canvas = html`<canvas width=\${width} height=\${height}>`
		const ctx = canvas.getContext("2d")
		ctx.drawImage(source, 0, 0, width, height)
		span.value = {
			width: width,
			height: height,
			data: ctx.getImageData(0, 0, width, height).data,
		}
		span.dispatchEvent(new CustomEvent("input"))
	}

	const clear_camera = () => {
		window.stream.getTracks().forEach(s => s.stop());
		video.srcObject = null;
		span.classList.add("waiting-for-permission");
	}
	prompt.onclick = () => {
		navigator.mediaDevices.getUserMedia({
			audio: false,
			video: {
				facingMode: "environment",
			},
		}).then(function(stream) {
			stream.onend = console.log
			window.stream = stream
			video.srcObject = stream
			window.cameraConnected = true
			video.controls = false
			video.play()
			video.controls = false
			span.classList.remove("waiting-for-permission");
		}).catch(function(error) {
			console.log(error)
		});
	}
	stop.onclick = () => {
		clear_camera()
	}
	popout.onclick = () => {
		span.classList.toggle("popped-out")
	}
	var intervalId = window.setInterval(function(){
	  	const cl = video.classList
		cl.remove("takepicture")
		void video.offsetHeight
		cl.add("takepicture")
		video.play()
		video.controls = false
		send_source(video, video.videoWidth, video.videoHeight)
	}, 150);
	shutter.onclick = () => {
		const cl = video.classList
		cl.remove("takepicture")
		void video.offsetHeight
		cl.add("takepicture")
		video.play()
		video.controls = false
		send_source(video, video.videoWidth, video.videoHeight)
	}

	document.addEventListener("visibilitychange", () => {
		if (document.visibilityState != "visible") {
			clear_camera()
		}
	})
	// Set a default image
	const img = html`<img crossOrigin="anonymous">`
	img.onload = () => {
	console.log("helloo")
		send_source(img, img.width, img.height)
	}
	img.src = "$(default_url)"
	console.log(img)
</script>
</span>
""" |> HTML
end


function process_raw_camera_data(raw_camera_data)
	# the raw image data is a long byte array, we need to transform it into something
	# more "Julian" - something with more _structure_.

	# The encoding of the raw byte stream is:
	# every 4 bytes is a single pixel
	# every pixel has 4 values: Red, Green, Blue, Alpha
	# (we ignore alpha for this notebook)

	# So to get the red values for each pixel, we take every 4th value, starting at
	# the 1st:
	reds_flat = UInt8.(raw_camera_data["data"][1:4:end])
	greens_flat = UInt8.(raw_camera_data["data"][2:4:end])
	blues_flat = UInt8.(raw_camera_data["data"][3:4:end])

	# but these are still 1-dimensional arrays, nicknamed 'flat' arrays
	# We will 'reshape' this into 2D arrays:

	width = raw_camera_data["width"]
	height = raw_camera_data["height"]

	# shuffle and flip to get it in the right shape
	reds = reshape(reds_flat, (width, height))' / 255.0
	greens = reshape(greens_flat, (width, height))' / 255.0
	blues = reshape(blues_flat, (width, height))' / 255.0

	# we have our 2D array for each color
	# Let's create a single 2D array, where each value contains the R, G and B value of
	# that pixel

	RGB.(reds, greens, blues)
end

end

# ╔═╡ 1a0324de-ee19-11ea-1d4d-db37f4136ad3
@bind raw_camera_data camera_input(;max_size=100)

# ╔═╡ 6f80e4ff-99bc-4c77-aebe-5e7f21f0d328
begin
function drawdots!(img, res, color )
	for i in res
		img[i[1]-1:i[1]+1, i[2]-1:i[2]+1] .= color
	end
end

function dist2p(p1, p2)
	sqrt((p1[1]-p2[1])^2 + (p1[2]-p2[2])^2)
end

function findmyangle(a1, a2; center)
	acos((dist2p(a1,center)^2 + dist2p(a2,center)^2 - dist2p(a1,a2)^2) / (2 * dist2p(center, a1) * dist2p(center, a2)))
end

end

# ╔═╡ ca92aa75-50c5-4720-a0d5-6993c21ea0b1
"""
	findconvexitydefects(contour, convhull; dist = 1.1, absdiff = 10, mindist = 0, currsize = 50, d1 = 2, d2 = 2, anglemax = π/2)
return the convexity defects using contour points and convexhull

###### Arguments
- contour -> contour points using suzuki and abe algorithm
- convexhull -> convexhull boundaries found from ImageMorphology.jl
- dist and absdiff -> helps in edge cases when trying to synchronize contour points and convexhull
- mindist -> to control min distance of a defect from a convex hull region line
- currsize -> to avoid small regions of contours
- d1 and d2 -> to control of defect from pair of convexhull points forming line individually
- anglemax -> helps to control max angle between convex hull line points and the defect
Idea           |  For one region
:-------------------------:|:-------------------------:
![](https://i1.wp.com/theailearner.com/wp-content/uploads/2020/11/hand_hull1.jpg?resize=624%2C438&ssl=1) |  ![](https://i0.wp.com/theailearner.com/wp-content/uploads/2020/11/conv_def1.jpg?w=489&ssl=1)


"""
function findconvexitydefects(
	contour,
	convhull;
	dist=1.1,
	absdiff=10,
	mindist=0,
	currsize= 50,
	d1 = 2,
	d2 = 2,
	anglemax = π/2
	)
	# first we need to match our contours and our convehull regions
	numindices = []
	previous = 0
	for i in convhull
		for (num,j) in enumerate(contour)
			if norm(Tuple(i) .- Tuple(j)) < dist && abs(previous-num) > absdiff # to avoid small and very close regions
					push!(numindices, num)
					previous = num
				break
			end
		end
	end
	# we want the numindices same as our convhull points,
	# to define regions of interest for each convhull line

	defects = Vector{CartesianIndex{2}}([]) # indexes with defects

	# incase numndices < convhull indexes,
	# meaning we don't hv regions for all lines
	if size(numindices)[1] < size(convhull)[1]
		throw(error("Raise the range dist, numindices points less than convexhull points, $(size(numindices)[1]) $(size(convhull)[1]) "))
	end

	# iterate over each consecutive pair of convhull points to form line
	for i in 1:size(convhull)[1]-1
		# to handle the case where numindices are like 1256, then 1
		if numindices[i] > numindices[i+1]
			curr = vcat(contour[numindices[i]: end], contour[1: numindices[i+1]])
		else
			# general case to define contours poins for each convhull region
			curr = contour[numindices[i]:numindices[i+1]]
		end

		# to remove minor regions of contours, we can set currsize
		if size(curr)[1] < currsize
			continue
		end

		# Defining the line
		p1 = Float64.(Tuple(convhull[i]))  # point 1
		p2 = Float64.(Tuple(convhull[i+1])) # point 2
		line = Line(;from=[p1[1], p1[2]], to=[p2[1], p2[2]])

		maxdef = 0 # max distance from our convhull line
		defloc = CartesianIndex(0,0) # location of the that point

		# check for each contour point in a convhull region
		# their distance and find max distance point
		for j in curr
			p = SA[j[1], j[2]]
			lpdist = LazySets.distance(p, line) # find distance
			# update if we find new max
			if lpdist > maxdef
				maxdef = lpdist
				defloc = j
			end
		end
		def1 = norm(Tuple(defloc) .- Tuple(p1))
		def2 = norm(Tuple(defloc) .- Tuple(p2))
		# we can define what minimum distance from line should be
		angle = findmyangle(p1,p2; center=defloc)
		if maxdef > mindist && def1 > d1 && def2 > d2 && angle < anglemax
			push!(defects, defloc)
		end
	end
	defects
end

# ╔═╡ dfb7c6be-ee0d-11ea-194e-9758857f7b20
begin

### Important
### Object tracker is here
function objecttracker(
    img,
    h = 50,
    s = 255,
    v = 255,
    lh = 0,
    ls = 20,
    lv = 70,
    boundingboxvar = 10
    )
    hsv_img = HSV.(img)
    channels = channelview(float.(hsv_img))
    hue_img = channels[1, :, :]
    val_img = channels[3, :, :]
    satur_img = channels[2, :, :]
    mask = zeros(size(hue_img))
    h, s, v = h, s, v
    h1, s1, v1 = lh, ls, lv
    ex = boundingboxvar
    for ind in eachindex(hue_img)
        if hue_img[ind] <= h && satur_img[ind] <= s / 255 && val_img[ind] <= v / 255
            if hue_img[ind] >= h1 && satur_img[ind] >= s1 / 255 && val_img[ind] >= v1 / 255
                mask[ind] = 1
            end
        end
    end

    img = mapwindow(ImageFiltering.median, dilate(mask), (3, 3))
	contours = find_contours(img)
    try
		convhull = convexhull(img .> 0.5)
    	push!(convhull, convhull[1])
        res = findconvexitydefects(contours[1], convhull; dist=3, absdiff = 2, currsize= 30, mindist =6)
		img_convex1 = RGB{N0f8}.(ones(size(img)))
	    drawdots!(img_convex1, res, RGB(0,0,1))
	    draw!(img_convex1, ImageDraw.Path(convhull), RGB(0))
	    draw_contours(img_convex1, RGB(0), contours)
	    return img_convex1, size(res)[1]
    catch e
		img_convex1 = RGB{N0f8}.(ones(size(img)))
	    draw_contours(img_convex1, RGB(0), contours)
		return img_convex1 , -1
        # return Gray.(img), e
		# return Gray.(img) , 0
    end
end;
end

# ╔═╡ 594acafd-01d4-4eee-b9e6-5b886953b5b1
begin

image = process_raw_camera_data(raw_camera_data);
img, num = objecttracker(image[:,1:70])
z = convert(Array{RGB24},img')
img = CairoImageSurface(z)
Drawing(img.width, img.height, :png)
placeimage(img, 0, 0)
sethue("red")
fontsize(10)
if num != -1
	Luxor.text("$(num[1]+1)", Luxor.Point(10, 10), halign=:center)
end
image_as_matrix()
end

# ╔═╡ 0814234d-459a-404b-9253-7f7665ea6a38
# begin
# Pkg.add(PackageSpec(url="https://github.com/Pocket-titan/DarkMode"))
# import DarkMode
# DarkMode.enable()
# end

# ╔═╡ f2236406-af64-403d-84bd-e3afe395b791
html"""<style>
main {
    max-width: 900px;
}
"""

# ╔═╡ Cell order:
# ╟─0da24d63-180f-4913-a8d6-3ba54a28ef04
# ╟─ca92aa75-50c5-4720-a0d5-6993c21ea0b1
# ╠═14519106-d4cf-4a77-acca-a22b7c426334
# ╟─dfb7c6be-ee0d-11ea-194e-9758857f7b20
# ╟─1a0324de-ee19-11ea-1d4d-db37f4136ad3
# ╠═594acafd-01d4-4eee-b9e6-5b886953b5b1
# ╟─841cd0d1-c5d4-41fe-949f-b2ddc9144634
# ╟─43f08085-b9b3-4e9b-b2ff-a0907b48a897
# ╟─f5642319-05ee-4731-ad26-80bcd4f6aa7b
# ╟─6f80e4ff-99bc-4c77-aebe-5e7f21f0d328
# ╟─0814234d-459a-404b-9253-7f7665ea6a38
# ╟─f2236406-af64-403d-84bd-e3afe395b791
# ╟─4934e4de-b03d-419f-a076-9a8116f5ddf5

## handlandmarksmediapipe.jl
# Hand Landmark detection using OpenCV.jl and mediapipe

# add OpenCV.jl and PyCall.jl using pkg manager
using OpenCV # julia's opencv binding
using PyCall # used to call python from inside

cap = OpenCV.VideoCapture(Int32(0)) # To open the webcam capture stream

OpenCV.namedWindow("Hand Landmarks detection") # create a window for the output


# python code to call mediapipe
py"""
# install mediapipe and numpy in your python
import mediapipe # does the processing of the image for the hand landmark detection
import numpy as np

drawingModule = mediapipe.solutions.drawing_utils # used to draw the hand landmarks
handsModule = mediapipe.solutions.hands # used to detect the hand

def process_image(img):
    # deep copy of the image
    vis =  np.array([x for x in img])

    # to get the hand keypoints
    with handsModule.Hands() as hands:

        # processing of the image
        results = hands.process(vis)

        # if results has some landmarks for multiple hands or less
        if results.multi_hand_landmarks != None:

            # for each hand, draw the landmarks
            for handLandmarks in results.multi_hand_landmarks:

                # draw on the image
                drawingModule.draw_landmarks(vis, handLandmarks, handsModule.HAND_CONNECTIONS)

    # return the image
    return vis
"""

imgfinal = OpenCV.Mat(zeros(UInt8, (3,640,480))) # create a matrix to store the final image
process_image = py"process_image" # assigning the python function to a julian name
while true
    ret, img = OpenCV.read(cap) # read the image from the webcam

    # img = reverse(img, dims = 2) # to flip image horizontal, but slows down

    # to handle case if webcam stopped
    if ret==false
        print("Webcam stopped")
        break
    end

    # rearranging dimension of the image for python
    input = cat(img[3,:,:], img[2,:,:], img[1,:,:]; dims=3)
    #(3,x,y) -> (x,y, 3)
    # calling the python function
    output = process_image(input);

    # rearranging the dimension of the output
    imgfinal[1,:,:] = output[:,:,3]
    imgfinal[2,:,:] = output[:,:,2]
    imgfinal[3,:,:] = output[:,:,1]

    # displaying the image
    OpenCV.imshow("Hand Landmarks detection", imgfinal)

    if OpenCV.waitKey(Int32(5))==27
        break
    end
end

# release the webcam stream
OpenCV.release(cap)

# to release all the windows and close them
OpenCV.destroyAllWindows()
	### A Pluto.jl notebook ###
	# v0.17.3

	using Markdown
	using InteractiveUtils

	# This Pluto notebook uses @bind for interactivity. When running this notebook outside of Pluto, the following 'mock version' of @bind gives bound variables a default value (instead of an error).
	macro bind(def, element)
	quote
	local iv = try Base.loaded_modules[Base.PkgId(Base.UUID("6e696c72-6542-2067-7265-42206c756150"), "AbstractPlutoDingetjes")].Bonds.initial_value catch; b -> missing; end
	local el = $(esc(element))
	global $(esc(def)) = Core.applicable(Base.get, el) ? Base.get(el) : iv(el)
	el
	end
	end

	# ╔═╡ 4934e4de-b03d-419f-a076-9a8116f5ddf5
	begin
	using Pkg;
	Pkg.activate(".") ;
	end

	# ╔═╡ 14519106-d4cf-4a77-acca-a22b7c426334
	using Cairo, Images, ImageDraw, Luxor, LinearAlgebra, LazySets, StaticArrays

	# ╔═╡ 0da24d63-180f-4913-a8d6-3ba54a28ef04
	md"""
	### Basic Hand Gesture Recognition using convexity defects

	We want to detect gestures from hands using classical image processing methods at this moment

	###### Steps
	- We recieve real time image from Javascript
	- Define region of interest in the image and get that part of image
	- Convert ROI image to HSV space, then threshold it to skin range, we recieve a binary mask
	- We use map window and dilation on the mask to reduce noise(quite robust actually)
	- Find contours from the mask now(suzuki and abe algorithm)
	- Find convexhull on the binary mask to find the convex hull(from ImageMorphology.jl)
	- Now we have contour points and convexhull of the mask, we find the convexity defects

	###### Convexity Defects wasn't available in julia so wrote my own
	Explanation and Interesting ideas on convexity defects will be done later

	- After we find num and location of convexity defects, we plot the points and num+1 as the number in image using Luxor.jl
	- This happens every 100ms

	"""

	# ╔═╡ 841cd0d1-c5d4-41fe-949f-b2ddc9144634
	md"""
	![](https://i.imgur.com/KQ5V3hJ.png)
	"""

	# ╔═╡ 43f08085-b9b3-4e9b-b2ff-a0907b48a897
	begin

	### Important
	### contour related utils

	# rotate direction clocwise
	function clockwise(dir)
	return (dir)%8 + 1
	end

	# rotate direction counterclocwise
	function counterclockwise(dir)
	return (dir+6)%8 + 1
	end

	# move from current pixel to next in given direction
	function move(pixel, image, dir, dir_delta)
	newp = pixel + dir_delta[dir]
	height, width = size(image)
	if (0 < newp[1] <= height) && (0 < newp[2] <= width)
	if image[newp]!=0
	return newp
	end
	end
	return CartesianIndex(0, 0)
	end

	# finds direction between two given pixels
	function from_to(from, to, dir_delta)
	delta = to-from
	return findall(x->x == delta, dir_delta)[1]
	end



	function detect_move(image, p0, p2, nbd, border, done, dir_delta)
	dir = from_to(p0, p2, dir_delta)
	moved = clockwise(dir)
	p1 = CartesianIndex(0, 0)
	while moved != dir ## 3.1
	newp = move(p0, image, moved, dir_delta)
	if newp[1]!=0
	p1 = newp
	break
	end
	moved = clockwise(moved)
	end

	if p1 == CartesianIndex(0, 0)
	return
	end

	p2 = p1 ## 3.2
	p3 = p0 ## 3.2
	done .= false
	while true
	dir = from_to(p3, p2, dir_delta)
	moved = counterclockwise(dir)
	p4 = CartesianIndex(0, 0)
	done .= false
	while true ## 3.3
	p4 = move(p3, image, moved, dir_delta)
	if p4[1] != 0
	break
	end
	done[moved] = true
	moved = counterclockwise(moved)
	end
	push!(border, p3) ## 3.4
	if p3[1] == size(image, 1) \|\| done[3]
	image[p3] = -nbd
	elseif image[p3] == 1
	image[p3] = nbd
	end

	if (p4 == p0 && p3 == p1) ## 3.5
	break
	end
	p2 = p3
	p3 = p4
	end
	end


	function find_contours(image)
	nbd = 1
	lnbd = 1
	image = Float64.(image)
	contour_list = Vector{typeof(CartesianIndex[])}()
	done = [false, false, false, false, false, false, false, false]

	# Clockwise Moore neighborhood.
	dir_delta = [CartesianIndex(-1, 0) , CartesianIndex(-1, 1), CartesianIndex(0, 1), CartesianIndex(1, 1), CartesianIndex(1, 0), CartesianIndex(1, -1), CartesianIndex(0, -1), CartesianIndex(-1,-1)]

	height, width = size(image)

	for i=1:height
	lnbd = 1
	for j=1:width
	fji = image[i, j]
	is_outer = (image[i, j] == 1 && (j == 1 \|\| image[i, j-1] == 0)) ## 1 (a)
	is_hole = (image[i, j] >= 1 && (j == width \|\| image[i, j+1] == 0))

	if is_outer \|\| is_hole
	# 2
	border = CartesianIndex[]

	from = CartesianIndex(i, j)

	if is_outer
	nbd += 1
	from -= CartesianIndex(0, 1)

	else
	nbd += 1
	if fji > 1
	lnbd = fji
	end
	from += CartesianIndex(0, 1)
	end

	p0 = CartesianIndex(i,j)
	detect_move(image, p0, from, nbd, border, done, dir_delta) ## 3
	if isempty(border) ##TODO
	push!(border, p0)
	image[p0] = -nbd
	end
	push!(contour_list, border)
	end
	if fji != 0 && fji != 1
	lnbd = abs(fji)
	end

	end
	end
	return contour_list
	end

	# a contour is a vector of 2 int arrays
	function draw_contour(image, color, contour)
	for ind in contour
	image[ind] = color
	end
	end
	function draw_contours(image, color, contours)
	for cnt in contours
	draw_contour(image, color, cnt)
	end
	end

	end

	# ╔═╡ f5642319-05ee-4731-ad26-80bcd4f6aa7b
	begin

	### Important
	### Webcam related utils

	function camera_input(;max_size=200, default_url="https://i.imgur.com/SUmi94P.png")
	"""
	<span class="pl-image waiting-for-permission">
	<style>

	.pl-image.popped-out {
	position: fixed;
	top: 0;
	right: 0;
	z-index: 5;
	}
	.pl-image #video-container {
	width: 250px;
	}
	.pl-image video {
	# border-radius: 1rem 1rem 0 0;
	}
	.pl-image.waiting-for-permission #video-container {
	display: none;
	}
	.pl-image #prompt {
	display: none;
	}
	.pl-image.waiting-for-permission #prompt {
	width: 250px;
	height: 200px;
	display: grid;
	place-items: center;
	font-family: monospace;
	font-weight: bold;
	text-decoration: underline;
	cursor: pointer;
	border: 5px dashed rgba(0,0,0,.5);
	}
	.pl-image video {
	display: block;
	}
	.pl-image .bar {
	width: inherit;
	display: flex;
	z-index: 6;
	}
	.pl-image .bar#top {
	position: absolute;
	flex-direction: column;
	}

	.pl-image .bar#bottom {
	background: black;
	# border-radius: 0 0 1rem 1rem;
	}
	.pl-image .bar button {
	flex: 0 0 auto;
	background: rgba(255,255,255,.8);
	border: none;
	width: 2rem;
	height: 2rem;
	border-radius: 100%;
	cursor: pointer;
	z-index: 7;
	}
	.pl-image .bar button#shutter {
	width: 3rem;
	height: 3rem;
	margin: -1.5rem auto .2rem auto;
	}
	.pl-image video.takepicture {
	animation: pictureflash 0ms linear;
	}
	@keyframes pictureflash {
	0% {
	filter: grayscale(1.0) contrast(2.0);
	}
	100% {
	filter: grayscale(0.0) contrast(1.0);
	}
	}
	</style>
	<div id="video-container">
	<div id="top" class="bar">
	<button id="stop" title="Stop video">✖</button>
	<button id="pop-out" title="Pop out/pop in">⏏</button>
	</div>
	<video playsinline autoplay></video>
	<div id="bottom" class="bar">
	<button id="shutter" title="Click to take a picture">📷</button>
	</div>
	</div>

	<div id="prompt">
	<span>
	Enable webcam
	</span>
	</div>
	<script>
	// based on https://github.com/fonsp/printi-static (by the same author)
	const span = currentScript.parentElement
	const video = span.querySelector("video")
	const popout = span.querySelector("button#pop-out")
	const stop = span.querySelector("button#stop")
	const shutter = span.querySelector("button#shutter")
	const prompt = span.querySelector(".pl-image #prompt")
	const maxsize = $(max_size)
	const send_source = (source, src_width, src_height) => {
	const scale = Math.min(1.0, maxsize / src_width, maxsize / src_height)
	const width = Math.floor(src_width * scale)
	const height = Math.floor(src_height * scale)
	const canvas = html`<canvas width=\${width} height=\${height}>`
	const ctx = canvas.getContext("2d")
	ctx.drawImage(source, 0, 0, width, height)
	span.value = {
	width: width,
	height: height,
	data: ctx.getImageData(0, 0, width, height).data,
	}
	span.dispatchEvent(new CustomEvent("input"))
	}

	const clear_camera = () => {
	window.stream.getTracks().forEach(s => s.stop());
	video.srcObject = null;
	span.classList.add("waiting-for-permission");
	}
	prompt.onclick = () => {
	navigator.mediaDevices.getUserMedia({
	audio: false,
	video: {
	facingMode: "environment",
	},
	}).then(function(stream) {
	stream.onend = console.log
	window.stream = stream
	video.srcObject = stream
	window.cameraConnected = true
	video.controls = false
	video.play()
	video.controls = false
	span.classList.remove("waiting-for-permission");
	}).catch(function(error) {
	console.log(error)
	});
	}
	stop.onclick = () => {
	clear_camera()
	}
	popout.onclick = () => {
	span.classList.toggle("popped-out")
	}
	var intervalId = window.setInterval(function(){
	const cl = video.classList
	cl.remove("takepicture")
	void video.offsetHeight
	cl.add("takepicture")
	video.play()
	video.controls = false
	send_source(video, video.videoWidth, video.videoHeight)
	}, 150);
	shutter.onclick = () => {
	const cl = video.classList
	cl.remove("takepicture")
	void video.offsetHeight
	cl.add("takepicture")
	video.play()
	video.controls = false
	send_source(video, video.videoWidth, video.videoHeight)
	}

	document.addEventListener("visibilitychange", () => {
	if (document.visibilityState != "visible") {
	clear_camera()
	}
	})
	// Set a default image
	const img = html`<img crossOrigin="anonymous">`
	img.onload = () => {
	console.log("helloo")
	send_source(img, img.width, img.height)
	}
	img.src = "$(default_url)"
	console.log(img)
	</script>
	</span>
	""" \|> HTML
	end


	function process_raw_camera_data(raw_camera_data)
	# the raw image data is a long byte array, we need to transform it into something
	# more "Julian" - something with more _structure_.

	# The encoding of the raw byte stream is:
	# every 4 bytes is a single pixel
	# every pixel has 4 values: Red, Green, Blue, Alpha
	# (we ignore alpha for this notebook)

	# So to get the red values for each pixel, we take every 4th value, starting at
	# the 1st:
	reds_flat = UInt8.(raw_camera_data["data"][1:4:end])
	greens_flat = UInt8.(raw_camera_data["data"][2:4:end])
	blues_flat = UInt8.(raw_camera_data["data"][3:4:end])

	# but these are still 1-dimensional arrays, nicknamed 'flat' arrays
	# We will 'reshape' this into 2D arrays:

	width = raw_camera_data["width"]
	height = raw_camera_data["height"]

	# shuffle and flip to get it in the right shape
	reds = reshape(reds_flat, (width, height))' / 255.0
	greens = reshape(greens_flat, (width, height))' / 255.0
	blues = reshape(blues_flat, (width, height))' / 255.0

	# we have our 2D array for each color
	# Let's create a single 2D array, where each value contains the R, G and B value of
	# that pixel

	RGB.(reds, greens, blues)
	end

	end

	# ╔═╡ 1a0324de-ee19-11ea-1d4d-db37f4136ad3
	@bind raw_camera_data camera_input(;max_size=100)

	# ╔═╡ 6f80e4ff-99bc-4c77-aebe-5e7f21f0d328
	begin
	function drawdots!(img, res, color )
	for i in res
	img[i[1]-1:i[1]+1, i[2]-1:i[2]+1] .= color
	end
	end

	function dist2p(p1, p2)
	sqrt((p1[1]-p2[1])^2 + (p1[2]-p2[2])^2)
	end

	function findmyangle(a1, a2; center)
	acos((dist2p(a1,center)^2 + dist2p(a2,center)^2 - dist2p(a1,a2)^2) / (2 * dist2p(center, a1) * dist2p(center, a2)))
	end

	end

	# ╔═╡ ca92aa75-50c5-4720-a0d5-6993c21ea0b1
	"""
	findconvexitydefects(contour, convhull; dist = 1.1, absdiff = 10, mindist = 0, currsize = 50, d1 = 2, d2 = 2, anglemax = π/2)
	return the convexity defects using contour points and convexhull

	###### Arguments
	- contour -> contour points using suzuki and abe algorithm
	- convexhull -> convexhull boundaries found from ImageMorphology.jl
	- dist and absdiff -> helps in edge cases when trying to synchronize contour points and convexhull
	- mindist -> to control min distance of a defect from a convex hull region line
	- currsize -> to avoid small regions of contours
	- d1 and d2 -> to control of defect from pair of convexhull points forming line individually
	- anglemax -> helps to control max angle between convex hull line points and the defect
	Idea \| For one region
	:-------------------------:\|:-------------------------:
	![](https://i1.wp.com/theailearner.com/wp-content/uploads/2020/11/hand_hull1.jpg?resize=624%2C438&ssl=1) \| ![](https://i0.wp.com/theailearner.com/wp-content/uploads/2020/11/conv_def1.jpg?w=489&ssl=1)


	"""
	function findconvexitydefects(
	contour,
	convhull;
	dist=1.1,
	absdiff=10,
	mindist=0,
	currsize= 50,
	d1 = 2,
	d2 = 2,
	anglemax = π/2
	)
	# first we need to match our contours and our convehull regions
	numindices = []
	previous = 0
	for i in convhull
	for (num,j) in enumerate(contour)
	if norm(Tuple(i) .- Tuple(j)) < dist && abs(previous-num) > absdiff # to avoid small and very close regions
	push!(numindices, num)
	previous = num
	break
	end
	end
	end
	# we want the numindices same as our convhull points,
	# to define regions of interest for each convhull line

	defects = Vector{CartesianIndex{2}}([]) # indexes with defects

	# incase numndices < convhull indexes,
	# meaning we don't hv regions for all lines
	if size(numindices)[1] < size(convhull)[1]
	throw(error("Raise the range dist, numindices points less than convexhull points, $(size(numindices)[1]) $(size(convhull)[1]) "))
	end

	# iterate over each consecutive pair of convhull points to form line
	for i in 1:size(convhull)[1]-1
	# to handle the case where numindices are like 1256, then 1
	if numindices[i] > numindices[i+1]
	curr = vcat(contour[numindices[i]: end], contour[1: numindices[i+1]])
	else
	# general case to define contours poins for each convhull region
	curr = contour[numindices[i]:numindices[i+1]]
	end

	# to remove minor regions of contours, we can set currsize
	if size(curr)[1] < currsize
	continue
	end

	# Defining the line
	p1 = Float64.(Tuple(convhull[i])) # point 1
	p2 = Float64.(Tuple(convhull[i+1])) # point 2
	line = Line(;from=[p1[1], p1[2]], to=[p2[1], p2[2]])

	maxdef = 0 # max distance from our convhull line
	defloc = CartesianIndex(0,0) # location of the that point

	# check for each contour point in a convhull region
	# their distance and find max distance point
	for j in curr
	p = SA[j[1], j[2]]
	lpdist = LazySets.distance(p, line) # find distance
	# update if we find new max
	if lpdist > maxdef
	maxdef = lpdist
	defloc = j
	end
	end
	def1 = norm(Tuple(defloc) .- Tuple(p1))
	def2 = norm(Tuple(defloc) .- Tuple(p2))
	# we can define what minimum distance from line should be
	angle = findmyangle(p1,p2; center=defloc)
	if maxdef > mindist && def1 > d1 && def2 > d2 && angle < anglemax
	push!(defects, defloc)
	end
	end
	defects
	end

	# ╔═╡ dfb7c6be-ee0d-11ea-194e-9758857f7b20
	begin

	### Important
	### Object tracker is here
	function objecttracker(
	img,
	h = 50,
	s = 255,
	v = 255,
	lh = 0,
	ls = 20,
	lv = 70,
	boundingboxvar = 10
	)
	hsv_img = HSV.(img)
	channels = channelview(float.(hsv_img))
	hue_img = channels[1, :, :]
	val_img = channels[3, :, :]
	satur_img = channels[2, :, :]
	mask = zeros(size(hue_img))
	h, s, v = h, s, v
	h1, s1, v1 = lh, ls, lv
	ex = boundingboxvar
	for ind in eachindex(hue_img)
	if hue_img[ind] <= h && satur_img[ind] <= s / 255 && val_img[ind] <= v / 255
	if hue_img[ind] >= h1 && satur_img[ind] >= s1 / 255 && val_img[ind] >= v1 / 255
	mask[ind] = 1
	end
	end
	end

	img = mapwindow(ImageFiltering.median, dilate(mask), (3, 3))
	contours = find_contours(img)
	try
	convhull = convexhull(img .> 0.5)
	push!(convhull, convhull[1])
	res = findconvexitydefects(contours[1], convhull; dist=3, absdiff = 2, currsize= 30, mindist =6)
	img_convex1 = RGB{N0f8}.(ones(size(img)))
	drawdots!(img_convex1, res, RGB(0,0,1))
	draw!(img_convex1, ImageDraw.Path(convhull), RGB(0))
	draw_contours(img_convex1, RGB(0), contours)
	return img_convex1, size(res)[1]
	catch e
	img_convex1 = RGB{N0f8}.(ones(size(img)))
	draw_contours(img_convex1, RGB(0), contours)
	return img_convex1 , -1
	# return Gray.(img), e
	# return Gray.(img) , 0
	end
	end;
	end

	# ╔═╡ 594acafd-01d4-4eee-b9e6-5b886953b5b1
	begin

	image = process_raw_camera_data(raw_camera_data);
	img, num = objecttracker(image[:,1:70])
	z = convert(Array{RGB24},img')
	img = CairoImageSurface(z)
	Drawing(img.width, img.height, :png)
	placeimage(img, 0, 0)
	sethue("red")
	fontsize(10)
	if num != -1
	Luxor.text("$(num[1]+1)", Luxor.Point(10, 10), halign=:center)
	end
	image_as_matrix()
	end

	# ╔═╡ 0814234d-459a-404b-9253-7f7665ea6a38
	# begin
	# Pkg.add(PackageSpec(url="https://github.com/Pocket-titan/DarkMode"))
	# import DarkMode
	# DarkMode.enable()
	# end

	# ╔═╡ f2236406-af64-403d-84bd-e3afe395b791
	html"""<style>
	main {
	max-width: 900px;
	}
	"""

	# ╔═╡ Cell order:
	# ╟─0da24d63-180f-4913-a8d6-3ba54a28ef04
	# ╟─ca92aa75-50c5-4720-a0d5-6993c21ea0b1
	# ╠═14519106-d4cf-4a77-acca-a22b7c426334
	# ╟─dfb7c6be-ee0d-11ea-194e-9758857f7b20
	# ╟─1a0324de-ee19-11ea-1d4d-db37f4136ad3
	# ╠═594acafd-01d4-4eee-b9e6-5b886953b5b1
	# ╟─841cd0d1-c5d4-41fe-949f-b2ddc9144634
	# ╟─43f08085-b9b3-4e9b-b2ff-a0907b48a897
	# ╟─f5642319-05ee-4731-ad26-80bcd4f6aa7b
	# ╟─6f80e4ff-99bc-4c77-aebe-5e7f21f0d328
	# ╟─0814234d-459a-404b-9253-7f7665ea6a38
	# ╟─f2236406-af64-403d-84bd-e3afe395b791
	# ╟─4934e4de-b03d-419f-a076-9a8116f5ddf5
	# Hand Landmark detection using OpenCV.jl and mediapipe

	# add OpenCV.jl and PyCall.jl using pkg manager
	using OpenCV # julia's opencv binding
	using PyCall # used to call python from inside

	cap = OpenCV.VideoCapture(Int32(0)) # To open the webcam capture stream

	OpenCV.namedWindow("Hand Landmarks detection") # create a window for the output


	# python code to call mediapipe
	py"""
	# install mediapipe and numpy in your python
	import mediapipe # does the processing of the image for the hand landmark detection
	import numpy as np

	drawingModule = mediapipe.solutions.drawing_utils # used to draw the hand landmarks
	handsModule = mediapipe.solutions.hands # used to detect the hand

	def process_image(img):
	# deep copy of the image
	vis = np.array([x for x in img])

	# to get the hand keypoints
	with handsModule.Hands() as hands:

	# processing of the image
	results = hands.process(vis)

	# if results has some landmarks for multiple hands or less
	if results.multi_hand_landmarks != None:

	# for each hand, draw the landmarks
	for handLandmarks in results.multi_hand_landmarks:

	# draw on the image
	drawingModule.draw_landmarks(vis, handLandmarks, handsModule.HAND_CONNECTIONS)

	# return the image
	return vis
	"""

	imgfinal = OpenCV.Mat(zeros(UInt8, (3,640,480))) # create a matrix to store the final image
	process_image = py"process_image" # assigning the python function to a julian name
	while true
	ret, img = OpenCV.read(cap) # read the image from the webcam

	# img = reverse(img, dims = 2) # to flip image horizontal, but slows down

	# to handle case if webcam stopped
	if ret==false
	print("Webcam stopped")
	break
	end

	# rearranging dimension of the image for python
	input = cat(img[3,:,:], img[2,:,:], img[1,:,:]; dims=3)
	#(3,x,y) -> (x,y, 3)
	# calling the python function
	output = process_image(input);

	# rearranging the dimension of the output
	imgfinal[1,:,:] = output[:,:,3]
	imgfinal[2,:,:] = output[:,:,2]
	imgfinal[3,:,:] = output[:,:,1]

	# displaying the image
	OpenCV.imshow("Hand Landmarks detection", imgfinal)

	if OpenCV.waitKey(Int32(5))==27
	break
	end
	end

	# release the webcam stream
	OpenCV.release(cap)

	# to release all the windows and close them
	OpenCV.destroyAllWindows()