Skip to content

Instantly share code, notes, and snippets.

@vardanagarwal
Created July 24, 2020 23:57
Show Gist options
  • Save vardanagarwal/1b00a6ac3f5bd22f853ebe3b4eab88ed to your computer and use it in GitHub Desktop.
Save vardanagarwal/1b00a6ac3f5bd22f853ebe3b4eab88ed to your computer and use it in GitHub Desktop.
"""Human facial landmark detector based on Convolutional Neural Network."""
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
import math
class FaceDetector:
"""Detect human face from image"""
def __init__(self,
dnn_proto_text='models/deploy.prototxt',
dnn_model='models/res10_300x300_ssd_iter_140000.caffemodel'):
"""Initialization"""
self.face_net = cv2.dnn.readNetFromCaffe(dnn_proto_text, dnn_model)
self.detection_result = None
def get_faceboxes(self, image, threshold=0.5):
"""
Get the bounding box of faces in image using dnn.
"""
rows, cols, _ = image.shape
confidences = []
faceboxes = []
self.face_net.setInput(cv2.dnn.blobFromImage(
image, 1.0, (300, 300), (104.0, 177.0, 123.0), False, False))
detections = self.face_net.forward()
for result in detections[0, 0, :, :]:
confidence = result[2]
if confidence > threshold:
x_left_bottom = int(result[3] * cols)
y_left_bottom = int(result[4] * rows)
x_right_top = int(result[5] * cols)
y_right_top = int(result[6] * rows)
confidences.append(confidence)
faceboxes.append(
[x_left_bottom, y_left_bottom, x_right_top, y_right_top])
self.detection_result = [faceboxes, confidences]
return confidences, faceboxes
def draw_all_result(self, image):
"""Draw the detection result on image"""
for facebox, conf in self.detection_result:
cv2.rectangle(image, (facebox[0], facebox[1]),
(facebox[2], facebox[3]), (0, 255, 0))
label = "face: %.4f" % conf
label_size, base_line = cv2.getTextSize(
label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
cv2.rectangle(image, (facebox[0], facebox[1] - label_size[1]),
(facebox[0] + label_size[0],
facebox[1] + base_line),
(0, 255, 0), cv2.FILLED)
cv2.putText(image, label, (facebox[0], facebox[1]),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
class MarkDetector:
"""Facial landmark detector by Convolutional Neural Network"""
def __init__(self, saved_model='models/pose_model'):
"""Initialization"""
# A face detector is required for mark detection.
self.face_detector = FaceDetector()
self.cnn_input_size = 128
self.marks = None
# Restore model from the saved_model file.
self.model = keras.models.load_model(saved_model)
@staticmethod
def draw_box(image, boxes, box_color=(255, 255, 255)):
"""Draw square boxes on image"""
for box in boxes:
cv2.rectangle(image,
(box[0], box[1]),
(box[2], box[3]), box_color, 3)
@staticmethod
def move_box(box, offset):
"""Move the box to direction specified by vector offset"""
left_x = box[0] + offset[0]
top_y = box[1] + offset[1]
right_x = box[2] + offset[0]
bottom_y = box[3] + offset[1]
return [left_x, top_y, right_x, bottom_y]
@staticmethod
def get_square_box(box):
"""Get a square box out of the given box, by expanding it."""
left_x = box[0]
top_y = box[1]
right_x = box[2]
bottom_y = box[3]
box_width = right_x - left_x
box_height = bottom_y - top_y
# Check if box is already a square. If not, make it a square.
diff = box_height - box_width
delta = int(abs(diff) / 2)
if diff == 0: # Already a square.
return box
elif diff > 0: # Height > width, a slim box.
left_x -= delta
right_x += delta
if diff % 2 == 1:
right_x += 1
else: # Width > height, a short box.
top_y -= delta
bottom_y += delta
if diff % 2 == 1:
bottom_y += 1
# Make sure box is always square.
assert ((right_x - left_x) == (bottom_y - top_y)), 'Box is not square.'
return [left_x, top_y, right_x, bottom_y]
@staticmethod
def box_in_image(box, image):
"""Check if the box is in image"""
rows = image.shape[0]
cols = image.shape[1]
return box[0] >= 0 and box[1] >= 0 and box[2] <= cols and box[3] <= rows
def extract_cnn_facebox(self, image):
"""Extract face area from image."""
_, raw_boxes = self.face_detector.get_faceboxes(
image=image, threshold=0.5)
a = []
for box in raw_boxes:
# Move box down.
# diff_height_width = (box[3] - box[1]) - (box[2] - box[0])
offset_y = int(abs((box[3] - box[1]) * 0.1))
box_moved = self.move_box(box, [0, offset_y])
# Make box square.
facebox = self.get_square_box(box_moved)
if self.box_in_image(facebox, image):
a.append(facebox)
return a
def detect_marks(self, image_np):
"""Detect marks from image"""
# # Actual detection.
predictions = self.model.signatures["predict"](
tf.constant(image_np, dtype=tf.uint8))
# Convert predictions to landmarks.
marks = np.array(predictions['output']).flatten()[:136]
marks = np.reshape(marks, (-1, 2))
return marks
@staticmethod
def draw_marks(image, marks, color=(255, 255, 255)):
"""Draw mark points on image"""
for mark in marks:
cv2.circle(image, (int(mark[0]), int(
mark[1])), 2, color, -1, cv2.LINE_AA)
def draw_annotation_box(img, rotation_vector, translation_vector, camera_matrix, color=(255, 255, 0), line_width=2):
"""Draw a 3D box as annotation of pose"""
point_3d = []
dist_coeffs = np.zeros((4,1))
rear_size = 1
rear_depth = 0
point_3d.append((-rear_size, -rear_size, rear_depth))
point_3d.append((-rear_size, rear_size, rear_depth))
point_3d.append((rear_size, rear_size, rear_depth))
point_3d.append((rear_size, -rear_size, rear_depth))
point_3d.append((-rear_size, -rear_size, rear_depth))
front_size = img.shape[1]
front_depth = front_size*2
point_3d.append((-front_size, -front_size, front_depth))
point_3d.append((-front_size, front_size, front_depth))
point_3d.append((front_size, front_size, front_depth))
point_3d.append((front_size, -front_size, front_depth))
point_3d.append((-front_size, -front_size, front_depth))
point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)
# Map to 2d img points
(point_2d, _) = cv2.projectPoints(point_3d,
rotation_vector,
translation_vector,
camera_matrix,
dist_coeffs)
point_2d = np.int32(point_2d.reshape(-1, 2))
# # Draw all the lines
# cv2.polylines(img, [point_2d], True, color, line_width, cv2.LINE_AA)
k = (point_2d[5] + point_2d[8])//2
# cv2.line(img, tuple(point_2d[1]), tuple(
# point_2d[6]), color, line_width, cv2.LINE_AA)
# cv2.line(img, tuple(point_2d[2]), tuple(
# point_2d[7]), color, line_width, cv2.LINE_AA)
# cv2.line(img, tuple(point_2d[3]), tuple(
# point_2d[8]), color, line_width, cv2.LINE_AA)
return(point_2d[2], k)
mark_detector = MarkDetector()
cap = cv2.VideoCapture(0)
ret, img = cap.read()
size = img.shape
font = cv2.FONT_HERSHEY_SIMPLEX
# 3D model points.
model_points = np.array([
(0.0, 0.0, 0.0), # Nose tip
(0.0, -330.0, -65.0), # Chin
(-225.0, 170.0, -135.0), # Left eye left corner
(225.0, 170.0, -135.0), # Right eye right corne
(-150.0, -150.0, -125.0), # Left Mouth corner
(150.0, -150.0, -125.0) # Right mouth corner
])
# Camera internals
focal_length = size[1]
center = (size[1]/2, size[0]/2)
camera_matrix = np.array(
[[focal_length, 0, center[0]],
[0, focal_length, center[1]],
[0, 0, 1]], dtype = "double"
)
while True:
ret, img = cap.read()
if ret == True:
faceboxes = mark_detector.extract_cnn_facebox(img)
for facebox in faceboxes:
face_img = img[facebox[1]: facebox[3],
facebox[0]: facebox[2]]
face_img = cv2.resize(face_img, (128, 128))
face_img = cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)
marks = mark_detector.detect_marks([face_img])
marks *= (facebox[2] - facebox[0])
marks[:, 0] += facebox[0]
marks[:, 1] += facebox[1]
shape = marks.astype(np.uint)
# mark_detector.draw_marks(img, marks, color=(0, 255, 0))
image_points = np.array([
shape[30], # Nose tip
shape[8], # Chin
shape[36], # Left eye left corner
shape[45], # Right eye right corne
shape[48], # Left Mouth corner
shape[54] # Right mouth corner
], dtype="double")
dist_coeffs = np.zeros((4,1)) # Assuming no lens distortion
(success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_UPNP)
# Project a 3D point (0, 0, 1000.0) onto the image plane.
# We use this to draw a line sticking out of the nose
(nose_end_point2D, jacobian) = cv2.projectPoints(np.array([(0.0, 0.0, 1000.0)]), rotation_vector, translation_vector, camera_matrix, dist_coeffs)
for p in image_points:
cv2.circle(img, (int(p[0]), int(p[1])), 3, (0,0,255), -1)
p1 = ( int(image_points[0][0]), int(image_points[0][1]))
p2 = ( int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1]))
x1, x2 = draw_annotation_box(img, rotation_vector, translation_vector, camera_matrix)
cv2.line(img, p1, p2, (0, 255, 255), 2)
cv2.line(img, tuple(x1), tuple(x2), (255, 255, 0), 2)
# for (x, y) in shape:
# cv2.circle(img, (x, y), 4, (255, 255, 0), -1)
# cv2.putText(img, str(p1), p1, font, 1, (0, 255, 255), 1)
try:
m = (p2[1] - p1[1])/(p2[0] - p1[0])
ang1 = int(math.degrees(math.atan(m)))
except:
ang1 = 90
try:
m = (x2[1] - x1[1])/(x2[0] - x1[0])
ang2 = int(math.degrees(math.atan(-1/m)))
except:
ang2 = 90
# print('div by zero error')
cv2.putText(img, str(ang1), tuple(p1), font, 2, (128, 255, 255), 3)
cv2.putText(img, str(ang2), tuple(x1), font, 2, (255, 255, 128), 3)
cv2.imshow('img', img)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
else:
break
cv2.destroyAllWindows()
cap.release()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment