tejastank/video_facial_landmarks.py

## video_facial_landmarks.py
# USAGE
# wget https://github.com/AKSHAYUBHAT/TensorFace/raw/master/openface/models/dlib/shape_predictor_68_face_landmarks.dat
# python3 video_facial_landmarks.py

# import the necessary packages
from socket import *
from imutils.video import VideoStream
from imutils import face_utils
from scipy import signal
import datetime
import argparse
import imutils
import time
import dlib
import cv2
import json

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--shape-predictor", default="shape_predictor_68_face_landmarks.dat",
	help="path to facial landmark predictor")
ap.add_argument("-r", "--picamera", type=int, default=-1,
	help="whether or not the Raspberry Pi camera should be used")
args = vars(ap.parse_args())

# initialize dlib's face detector (HOG-based) and then create
# the facial landmark predictor
print("[INFO] loading facial landmark predictor...")
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(args["shape_predictor"])

# initialize the video stream and allow the cammera sensor to warmup
print("[INFO] camera sensor warming up...")
vs = VideoStream(usePiCamera=args["picamera"] > 0).start()
time.sleep(2.0)

shape_history = []
history_size = 15

last_rects = None

cs = socket(AF_INET, SOCK_DGRAM)
cs.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)

frame_no = 0

# loop over the frames from the video stream
while True:
	frame_no += 1
	# grab the frame from the threaded video stream, resize it to
	# have a maximum width of 400 pixels, and convert it to
	# grayscale
	frame = vs.read()
	frame = imutils.resize(frame, width=400)
	frame = cv2.flip(frame, 1)
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

	# detect faces in the grayscale frame
	rects = detector(gray, 0)

	scale_factor = 3
	frame = imutils.resize(frame, width=400*scale_factor)

	if len(rects) == 0 and not last_rects is None:
		rects = last_rects
	else:
		last_rects = rects

	# loop over the face detections
	for rect in rects:
		# determine the facial landmarks for the face region, then
		# convert the facial landmark (x, y)-coordinates to a NumPy
		# array
		shape_raw = predictor(gray, rect)
		shape_raw = face_utils.shape_to_np(shape_raw)
		shape_history.append(shape_raw)

		if len(shape_history) > history_size:
			shape_history.pop(0)

		#shape_filtered = signal.savgol_filter(shape_history[0], 5, 2, axis=1)
		shape_filtered = [None]*68
		for i in range(68):
			shape_filtered[i] = shape_history[0][i]

		for snapshot in shape_history:
			for idx, shape in enumerate(snapshot):
					shape_filtered[idx] = [
						(2*shape_filtered[idx][0] + shape[0]) / 3,
						(2*shape_filtered[idx][1] + shape[1]) / 3
					]

		for idx, shape in enumerate(shape_filtered):
			shape_filtered[idx] = [
				int(shape[0]),
				int(shape[1])
			]

		# loop over the (x, y)-coordinates for the facial landmarks
		# and draw them on the image
		"""
		for idx, (x, y) in enumerate(shape_filtered):
			cv2.circle(frame, (x*scale_factor, y*scale_factor), 1, (0, 0, 255), -1)
			cv2.putText(frame, str(idx), (x*scale_factor, y*scale_factor),
				cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
		"""

		lines = [
			(36, 45), # horizontal
			(27, 8),  # vertical

			(36, 8),  # left eye to chin
			(45, 8),  # right eye to chin

			(36, 30),  # left eye to nose tip
			(45, 30),  # right eye to nose tip

			(27, 30), # nose bridge
			(30, 8),  # nose tip to chin

			(31, 35), # nose bottom
			(31, 30), # nose bottom left to tip
			(35, 30), # nose bottom right to tip

			(65, 67), # bottom inner lip
			(61, 63), # top inner lip
		]
		for line in lines:
			cv2.line(frame, (shape_filtered[line[0]][0]*scale_factor, shape_filtered[line[0]][1]*scale_factor), (shape_filtered[line[1]][0]*scale_factor, shape_filtered[line[1]][1]*scale_factor), (0, 255, 0), 1)

		cs.sendto(json.dumps(shape_filtered).encode(), ("127.0.0.1", 50000))

	# show the frame
	cv2.imshow("Frame", frame)
	key = cv2.waitKey(1) & 0xFF

	# if the `q` key was pressed, break from the loop
	if key == ord("q"):
		break

# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()

## virtual_avatar.pde
import hypermedia.net.*;

int PORT_RX = 50000;
String HOST_IP = "127.0.0.1"; //IP Address of the PC in which this App is running
UDP udp; //Create UDP object for recieving

String lastJSON = "[[143, 147], [142, 157], [144, 167], [146, 178], [149, 188], [153, 197], [159, 205], [166, 211], [176, 214], [188, 213], [200, 209], [211, 204], [221, 197], [227, 187], [231, 175], [233, 162], [234, 149], [147, 136], [151, 131], [158, 129], [166, 130], [172, 133], [183, 134], [192, 131], [201, 130], [210, 133], [217, 138], [176, 145], [175, 151], [174, 157], [173, 163], [168, 171], [171, 173], [175, 174], [180, 173], [185, 172], [155, 147], [158, 145], [163, 145], [168, 147], [163, 148], [158, 148], [191, 148], [196, 146], [201, 146], [207, 148], [201, 150], [196, 149], [163, 189], [167, 184], [171, 181], [175, 182], [179, 181], [185, 184], [192, 189], [185, 193], [179, 194], [175, 195], [171, 194], [167, 193], [166, 188], [171, 186], [175, 186], [179, 186], [189, 189], [179, 188], [175, 189], [171, 188]]";

float pointLineDistance(float lx1, float ly1, float lx2, float ly2, float px, float py) {
  return
    abs(((ly2 - ly1) * px) - ((lx2 - lx1) * py) + (lx2 * ly1) - (ly2 * lx1)) /
    sqrt(pow(ly2 - ly1, 2) + pow(lx2 - lx1, 2));
}

float pointDistance(float px1, float py1, float px2, float py2) {
  return
    sqrt(
    pow(px2 - px1, 2) +
    pow(py2 - py1, 2)
    );
}

float rotationX, rotationY, rotationZ, midpointX, midpointY, mouthDistance, headSize;
float lerpAmount = 0.35;

void setup() {
  udp = new UDP(this, PORT_RX, HOST_IP);
  udp.log(true);
  udp.listen(true);

  frame.setTitle("Homemade Virtual Avatar");
  size(1280, 720, P3D);
  background(0);
  lights();
  stroke(255);
}

void draw() {
  clear();
  background(0, 255, 0);
  try {
    JSONArray values = parseJSONArray(lastJSON);
    JSONArray pEyeLeft = values.getJSONArray(36);
    JSONArray pEyeRight = values.getJSONArray(45);
    JSONArray pNoseTip = values.getJSONArray(30);
    JSONArray pNoseTop = values.getJSONArray(27);
    JSONArray pChin = values.getJSONArray(8);
    JSONArray pMouthTop = values.getJSONArray(62);
    JSONArray pMouthBottom = values.getJSONArray(66);

    // Head position
    midpointX = lerp(midpointX, (pEyeLeft.getInt(0) + pEyeRight.getInt(0) + pChin.getInt(0)) / 3 * 1280 / 400, lerpAmount);
    midpointY = lerp(midpointY, (pEyeLeft.getInt(1) + pEyeRight.getInt(1) + pChin.getInt(1)) / 3 * 720 / 300, lerpAmount);

    // Yes
    rotationX = lerp(rotationX, pointLineDistance(
      pEyeLeft.getInt(0), pEyeLeft.getInt(1),
      pEyeRight.getInt(0), pEyeRight.getInt(1),
      pNoseTop.getInt(0), pNoseTop.getInt(1)
      ) / 500 * 8 * PI, lerpAmount);

    // No
    rotationY = lerp(rotationY, pointLineDistance(
      pNoseTop.getInt(0), pNoseTop.getInt(1),
      pChin.getInt(0), pChin.getInt(1),
      pNoseTip.getInt(0), pNoseTip.getInt(1)
      ) / 500 * 8 * PI * (pNoseTip.getInt(0) > (pNoseTop.getInt(0) + pChin.getInt(0)) / 2 ? 1 : -1), lerpAmount);

    // What
    rotationZ = lerp(rotationZ, atan2(
      pEyeRight.getInt(1) - pEyeLeft.getInt(1),
      pEyeRight.getInt(0) - pEyeLeft.getInt(0)
      ), lerpAmount);

    // Mouth distance (0.0f - 1.0f)
    mouthDistance = lerp(mouthDistance,
      max(0, min(5, pointDistance(
      pMouthTop.getInt(0), pMouthTop.getInt(1),
      pMouthBottom.getInt(0), pMouthBottom.getInt(1)
      )-5)) / 5.0, lerpAmount);

    // Head size
    // Approx. 30.0f to 80.0f
    headSize = lerp(headSize,
      pointDistance(
      pEyeLeft.getInt(0), pEyeLeft.getInt(1),
      pEyeRight.getInt(0), pEyeRight.getInt(1)
      ), lerpAmount);

    System.out.println(headSize);


    pushMatrix();
    {
      // NOTE: headSize causes a lot of jitter
      // translate(midpointX, midpointY);
      translate(midpointX, midpointY, headSize * 5);
      rotateX(rotationX);
      rotateY(rotationY);
      rotateZ(rotationZ);

      // head
      fill(255 * mouthDistance);
      stroke(255 * (1-mouthDistance));
      box(100);

      fill(255);
      stroke(0);
      translate(-15, -15, 50);
      box(10);
      translate(30, 0, 0);
      box(10);
    }
    popMatrix();
  }
  catch(Exception e) {
    e.printStackTrace();
  }
}

void receive(byte[] data, String ip, int port) {
  //System.out.println(new String(data).trim());
  lastJSON = new String(data).trim();
}
	# USAGE
	# wget https://github.com/AKSHAYUBHAT/TensorFace/raw/master/openface/models/dlib/shape_predictor_68_face_landmarks.dat
	# python3 video_facial_landmarks.py

	# import the necessary packages
	from socket import *
	from imutils.video import VideoStream
	from imutils import face_utils
	from scipy import signal
	import datetime
	import argparse
	import imutils
	import time
	import dlib
	import cv2
	import json

	# construct the argument parse and parse the arguments
	ap = argparse.ArgumentParser()
	ap.add_argument("-p", "--shape-predictor", default="shape_predictor_68_face_landmarks.dat",
	help="path to facial landmark predictor")
	ap.add_argument("-r", "--picamera", type=int, default=-1,
	help="whether or not the Raspberry Pi camera should be used")
	args = vars(ap.parse_args())

	# initialize dlib's face detector (HOG-based) and then create
	# the facial landmark predictor
	print("[INFO] loading facial landmark predictor...")
	detector = dlib.get_frontal_face_detector()
	predictor = dlib.shape_predictor(args["shape_predictor"])

	# initialize the video stream and allow the cammera sensor to warmup
	print("[INFO] camera sensor warming up...")
	vs = VideoStream(usePiCamera=args["picamera"] > 0).start()
	time.sleep(2.0)

	shape_history = []
	history_size = 15

	last_rects = None

	cs = socket(AF_INET, SOCK_DGRAM)
	cs.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)

	frame_no = 0

	# loop over the frames from the video stream
	while True:
	frame_no += 1
	# grab the frame from the threaded video stream, resize it to
	# have a maximum width of 400 pixels, and convert it to
	# grayscale
	frame = vs.read()
	frame = imutils.resize(frame, width=400)
	frame = cv2.flip(frame, 1)
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

	# detect faces in the grayscale frame
	rects = detector(gray, 0)

	scale_factor = 3
	frame = imutils.resize(frame, width=400*scale_factor)

	if len(rects) == 0 and not last_rects is None:
	rects = last_rects
	else:
	last_rects = rects

	# loop over the face detections
	for rect in rects:
	# determine the facial landmarks for the face region, then
	# convert the facial landmark (x, y)-coordinates to a NumPy
	# array
	shape_raw = predictor(gray, rect)
	shape_raw = face_utils.shape_to_np(shape_raw)
	shape_history.append(shape_raw)

	if len(shape_history) > history_size:
	shape_history.pop(0)

	#shape_filtered = signal.savgol_filter(shape_history[0], 5, 2, axis=1)
	shape_filtered = [None]*68
	for i in range(68):
	shape_filtered[i] = shape_history[0][i]

	for snapshot in shape_history:
	for idx, shape in enumerate(snapshot):
	shape_filtered[idx] = [
	(2*shape_filtered[idx][0] + shape[0]) / 3,
	(2*shape_filtered[idx][1] + shape[1]) / 3
	]

	for idx, shape in enumerate(shape_filtered):
	shape_filtered[idx] = [
	int(shape[0]),
	int(shape[1])
	]

	# loop over the (x, y)-coordinates for the facial landmarks
	# and draw them on the image
	"""
	for idx, (x, y) in enumerate(shape_filtered):
	cv2.circle(frame, (xscale_factor, yscale_factor), 1, (0, 0, 255), -1)
	cv2.putText(frame, str(idx), (xscale_factor, yscale_factor),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
	"""

	lines = [
	(36, 45), # horizontal
	(27, 8), # vertical

	(36, 8), # left eye to chin
	(45, 8), # right eye to chin

	(36, 30), # left eye to nose tip
	(45, 30), # right eye to nose tip

	(27, 30), # nose bridge
	(30, 8), # nose tip to chin

	(31, 35), # nose bottom
	(31, 30), # nose bottom left to tip
	(35, 30), # nose bottom right to tip

	(65, 67), # bottom inner lip
	(61, 63), # top inner lip
	]
	for line in lines:
	cv2.line(frame, (shape_filtered[line[0]][0]scale_factor, shape_filtered[line[0]][1]scale_factor), (shape_filtered[line[1]][0]scale_factor, shape_filtered[line[1]][1]scale_factor), (0, 255, 0), 1)

	cs.sendto(json.dumps(shape_filtered).encode(), ("127.0.0.1", 50000))

	# show the frame
	cv2.imshow("Frame", frame)
	key = cv2.waitKey(1) & 0xFF

	# if the `q` key was pressed, break from the loop
	if key == ord("q"):
	break

	# do a bit of cleanup
	cv2.destroyAllWindows()
	vs.stop()
	import hypermedia.net.*;

	int PORT_RX = 50000;
	String HOST_IP = "127.0.0.1"; //IP Address of the PC in which this App is running
	UDP udp; //Create UDP object for recieving

	String lastJSON = "[[143, 147], [142, 157], [144, 167], [146, 178], [149, 188], [153, 197], [159, 205], [166, 211], [176, 214], [188, 213], [200, 209], [211, 204], [221, 197], [227, 187], [231, 175], [233, 162], [234, 149], [147, 136], [151, 131], [158, 129], [166, 130], [172, 133], [183, 134], [192, 131], [201, 130], [210, 133], [217, 138], [176, 145], [175, 151], [174, 157], [173, 163], [168, 171], [171, 173], [175, 174], [180, 173], [185, 172], [155, 147], [158, 145], [163, 145], [168, 147], [163, 148], [158, 148], [191, 148], [196, 146], [201, 146], [207, 148], [201, 150], [196, 149], [163, 189], [167, 184], [171, 181], [175, 182], [179, 181], [185, 184], [192, 189], [185, 193], [179, 194], [175, 195], [171, 194], [167, 193], [166, 188], [171, 186], [175, 186], [179, 186], [189, 189], [179, 188], [175, 189], [171, 188]]";

	float pointLineDistance(float lx1, float ly1, float lx2, float ly2, float px, float py) {
	return
	abs(((ly2 - ly1) * px) - ((lx2 - lx1) * py) + (lx2 * ly1) - (ly2 * lx1)) /
	sqrt(pow(ly2 - ly1, 2) + pow(lx2 - lx1, 2));
	}

	float pointDistance(float px1, float py1, float px2, float py2) {
	return
	sqrt(
	pow(px2 - px1, 2) +
	pow(py2 - py1, 2)
	);
	}

	float rotationX, rotationY, rotationZ, midpointX, midpointY, mouthDistance, headSize;
	float lerpAmount = 0.35;

	void setup() {
	udp = new UDP(this, PORT_RX, HOST_IP);
	udp.log(true);
	udp.listen(true);

	frame.setTitle("Homemade Virtual Avatar");
	size(1280, 720, P3D);
	background(0);
	lights();
	stroke(255);
	}

	void draw() {
	clear();
	background(0, 255, 0);
	try {
	JSONArray values = parseJSONArray(lastJSON);
	JSONArray pEyeLeft = values.getJSONArray(36);
	JSONArray pEyeRight = values.getJSONArray(45);
	JSONArray pNoseTip = values.getJSONArray(30);
	JSONArray pNoseTop = values.getJSONArray(27);
	JSONArray pChin = values.getJSONArray(8);
	JSONArray pMouthTop = values.getJSONArray(62);
	JSONArray pMouthBottom = values.getJSONArray(66);

	// Head position
	midpointX = lerp(midpointX, (pEyeLeft.getInt(0) + pEyeRight.getInt(0) + pChin.getInt(0)) / 3 * 1280 / 400, lerpAmount);
	midpointY = lerp(midpointY, (pEyeLeft.getInt(1) + pEyeRight.getInt(1) + pChin.getInt(1)) / 3 * 720 / 300, lerpAmount);

	// Yes
	rotationX = lerp(rotationX, pointLineDistance(
	pEyeLeft.getInt(0), pEyeLeft.getInt(1),
	pEyeRight.getInt(0), pEyeRight.getInt(1),
	pNoseTop.getInt(0), pNoseTop.getInt(1)
	) / 500 * 8 * PI, lerpAmount);

	// No
	rotationY = lerp(rotationY, pointLineDistance(
	pNoseTop.getInt(0), pNoseTop.getInt(1),
	pChin.getInt(0), pChin.getInt(1),
	pNoseTip.getInt(0), pNoseTip.getInt(1)
	) / 500 * 8 * PI * (pNoseTip.getInt(0) > (pNoseTop.getInt(0) + pChin.getInt(0)) / 2 ? 1 : -1), lerpAmount);

	// What
	rotationZ = lerp(rotationZ, atan2(
	pEyeRight.getInt(1) - pEyeLeft.getInt(1),
	pEyeRight.getInt(0) - pEyeLeft.getInt(0)
	), lerpAmount);

	// Mouth distance (0.0f - 1.0f)
	mouthDistance = lerp(mouthDistance,
	max(0, min(5, pointDistance(
	pMouthTop.getInt(0), pMouthTop.getInt(1),
	pMouthBottom.getInt(0), pMouthBottom.getInt(1)
	)-5)) / 5.0, lerpAmount);

	// Head size
	// Approx. 30.0f to 80.0f
	headSize = lerp(headSize,
	pointDistance(
	pEyeLeft.getInt(0), pEyeLeft.getInt(1),
	pEyeRight.getInt(0), pEyeRight.getInt(1)
	), lerpAmount);

	System.out.println(headSize);


	pushMatrix();
	{
	// NOTE: headSize causes a lot of jitter
	// translate(midpointX, midpointY);
	translate(midpointX, midpointY, headSize * 5);
	rotateX(rotationX);
	rotateY(rotationY);
	rotateZ(rotationZ);

	// head
	fill(255 * mouthDistance);
	stroke(255 * (1-mouthDistance));
	box(100);

	fill(255);
	stroke(0);
	translate(-15, -15, 50);
	box(10);
	translate(30, 0, 0);
	box(10);
	}
	popMatrix();
	}
	catch(Exception e) {
	e.printStackTrace();
	}
	}

	void receive(byte[] data, String ip, int port) {
	//System.out.println(new String(data).trim());
	lastJSON = new String(data).trim();
	}