TheJLifeX/00-hand-gesture-recognition.gif

## 00-hand-gesture-recognition.gif

      
    Raw
  

              00-hand-gesture-recognition.gif
            
          
## 01-README.md

      
    Raw
  

              01-README.md
            
          
    Simple Hand Gesture Recognition Code - Hand tracking - Mediapipe

Goal of this gist is to recognize ONE, TWO, TREE, FOUR, FIVE, SIX, YEAH, ROCK, SPIDERMAN and OK.
We use the LANDMARKS output of the LandmarkLetterboxRemovalCalculator. This output is a landmark list that contains 21 landmark.
In the 02-landmarks.jpg picture below you can see the index of each landmark.
Each landmark have  x, y and z values. But only x, y values are sufficient for our Goal.
If you dont want to copy/paste each the code on this gist, you can clone my forked version of mediapipe here: https://github.com/TheJLifeX/mediapipe.
I have already commited all code in that repository.
We have five finger states.

thumbIsOpen
firstFingerIsOpen
secondFingerIsOpen
thirdFingerIsOpen
fourthFingerIsOpen

For exmaple: thumb is open if the x value of landmark 3 and the x value of landmark 4 are less than x value of landmark 2 else it is close
PS: thumb open/close works only for the right hand.
Because we can not yet determine if you show your left or right hand. For more info see this issue: Can palm_detection distinguish between right and left hand?
Prerequisite:
You kwon how to run the hand tracking example.

Get Started with mediapipe
Hand Tracking on Desktop

If you want to know how to recognize some simple hand mouvements like Scrolling, Zoom in/out and Slide left/right (see comment below) you can read this gist: Simple Hand Mouvement Recognition Code.

  
## 02-landmarks.jpg

      
    Raw
  

              02-landmarks.jpg
            
          
## 03-hand-gesture-recognition-calculator.cc
#include <cmath>
#include "mediapipe/framework/calculator_framework.h"
#include "mediapipe/framework/formats/landmark.pb.h"
#include "mediapipe/framework/formats/rect.pb.h"

namespace mediapipe
{

namespace
{
constexpr char normRectTag[] = "NORM_RECT";
constexpr char normalizedLandmarkListTag[] = "NORM_LANDMARKS";
} // namespace

// Graph config:
//
// node {
//   calculator: "HandGestureRecognitionCalculator"
//   input_stream: "NORM_LANDMARKS:scaled_landmarks"
//   input_stream: "NORM_RECT:hand_rect_for_next_frame"
// }
class HandGestureRecognitionCalculator : public CalculatorBase
{
public:
    static ::mediapipe::Status GetContract(CalculatorContract *cc);
    ::mediapipe::Status Open(CalculatorContext *cc) override;

    ::mediapipe::Status Process(CalculatorContext *cc) override;

private:
    float get_Euclidean_DistanceAB(float a_x, float a_y, float b_x, float b_y)
    {
        float dist = std::pow(a_x - b_x, 2) + pow(a_y - b_y, 2);
        return std::sqrt(dist);
    }

    bool isThumbNearFirstFinger(NormalizedLandmark point1, NormalizedLandmark point2)
    {
        float distance = this->get_Euclidean_DistanceAB(point1.x(), point1.y(), point2.x(), point2.y());
        return distance < 0.1;
    }
};

REGISTER_CALCULATOR(HandGestureRecognitionCalculator);

::mediapipe::Status HandGestureRecognitionCalculator::GetContract(
    CalculatorContract *cc)
{
    RET_CHECK(cc->Inputs().HasTag(normalizedLandmarkListTag));
    cc->Inputs().Tag(normalizedLandmarkListTag).Set<mediapipe::NormalizedLandmarkList>();

    RET_CHECK(cc->Inputs().HasTag(normRectTag));
    cc->Inputs().Tag(normRectTag).Set<NormalizedRect>();

    return ::mediapipe::OkStatus();
}

::mediapipe::Status HandGestureRecognitionCalculator::Open(
    CalculatorContext *cc)
{
    cc->SetOffset(TimestampDiff(0));
    return ::mediapipe::OkStatus();
}

::mediapipe::Status HandGestureRecognitionCalculator::Process(
    CalculatorContext *cc)
{
    // hand closed (red) rectangle
    const auto rect = &(cc->Inputs().Tag(normRectTag).Get<NormalizedRect>());
    float width = rect->width();
    float height = rect->height();

    if (width < 0.01 || height < 0.01)
    {
        LOG(INFO) << "No Hand Detected";
        return ::mediapipe::OkStatus();
    }

    const auto &landmarkList = cc->Inputs()
                                   .Tag(normalizedLandmarkListTag)
                                   .Get<mediapipe::NormalizedLandmarkList>();
    RET_CHECK_GT(landmarkList.landmark_size(), 0) << "Input landmark vector is empty.";

    // finger states
    bool thumbIsOpen = false;
    bool firstFingerIsOpen = false;
    bool secondFingerIsOpen = false;
    bool thirdFingerIsOpen = false;
    bool fourthFingerIsOpen = false;
    //

    float pseudoFixKeyPoint = landmarkList.landmark(2).x();
    if (landmarkList.landmark(3).x() < pseudoFixKeyPoint && landmarkList.landmark(4).x() < pseudoFixKeyPoint)
    {
        thumbIsOpen = true;
    }

    pseudoFixKeyPoint = landmarkList.landmark(6).y();
    if (landmarkList.landmark(7).y() < pseudoFixKeyPoint && landmarkList.landmark(8).y() < pseudoFixKeyPoint)
    {
        firstFingerIsOpen = true;
    }

    pseudoFixKeyPoint = landmarkList.landmark(10).y();
    if (landmarkList.landmark(11).y() < pseudoFixKeyPoint && landmarkList.landmark(12).y() < pseudoFixKeyPoint)
    {
        secondFingerIsOpen = true;
    }

    pseudoFixKeyPoint = landmarkList.landmark(14).y();
    if (landmarkList.landmark(15).y() < pseudoFixKeyPoint && landmarkList.landmark(16).y() < pseudoFixKeyPoint)
    {
        thirdFingerIsOpen = true;
    }

    pseudoFixKeyPoint = landmarkList.landmark(18).y();
    if (landmarkList.landmark(19).y() < pseudoFixKeyPoint && landmarkList.landmark(20).y() < pseudoFixKeyPoint)
    {
        fourthFingerIsOpen = true;
    }

    // Hand gesture recognition
    if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen)
    {
        LOG(INFO) << "FIVE!";
    }
    else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen)
    {
        LOG(INFO) << "FOUR!";
    }
    else if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
    {
        LOG(INFO) << "TREE!";
    }
    else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
    {
        LOG(INFO) << "TWO!";
    }
    else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
    {
        LOG(INFO) << "ONE!";
    }
    else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
    {
        LOG(INFO) << "YEAH!";
    }
    else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen)
    {
        LOG(INFO) << "ROCK!";
    }
    else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen)
    {
        LOG(INFO) << "SPIDERMAN!";
    }
    else if (!thumbIsOpen && !firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
    {
        LOG(INFO) << "FIST!";
    }
    else if (!firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen && this->isThumbNearFirstFinger(landmarkList.landmark(4), landmarkList.landmark(8)))
    {
        LOG(INFO) << "OK!";
    }
    else
    {
        LOG(INFO) << "Finger States: " << thumbIsOpen << firstFingerIsOpen << secondFingerIsOpen << thirdFingerIsOpen << fourthFingerIsOpen;
        LOG(INFO) << "___";
    }

    return ::mediapipe::OkStatus();
} // namespace mediapipe

} // namespace mediapipe

## 04-README.md

      
    Raw
  

              04-README.md
            
          
    We have to add the HandGestureRecognitionCalculator node config in the in the hand_landmark_cpu.pbtxt or hand_landmark_gpu.pbtxt graph file.

  node {
      calculator: "HandGestureRecognitionCalculator"
      input_stream: "NORM_LANDMARKS:scaled_landmarks"
      input_stream: "NORM_RECT:hand_rect_for_next_frame"
    }
For example:

in the hand_landmark_cpu.pbtx see here: https://github.com/TheJLifeX/mediapipe/blob/master/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_cpu.pbtxt#L187-L191
in the hand_landmark_gpu.pbtx see here: https://github.com/TheJLifeX/mediapipe/blob/master/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt#L182-L186


## 05-README.md

      
    Raw
  

              05-README.md
            
          
    We have to create a bazel build config for our Calculator.


## 06-BUILD
cc_library(
    name = "hand-gesture-recognition-calculator",
    srcs = ["hand-gesture-recognition-calculator.cc"],
    visibility = ["//visibility:public"],
    deps = [
        "//mediapipe/framework:calculator_framework",
        "//mediapipe/framework/formats:landmark_cc_proto",
        "//mediapipe/framework/port:status",
        "//mediapipe/framework/formats:rect_cc_proto",
        "//mediapipe/framework/port:ret_check",
    ],
    alwayslink = 1,
)

## 07-REAME.md

      
    Raw
  

              07-REAME.md
            
          
    We have to add the path to the "hand-gesture-recognition-calculator" bazel build config in the hand_landmark_cpu or hand_landmark_cpu bazel build config.

For example: "//hand-gesture-recognition:hand-gesture-recognition-calculator"

in the hand_landmark_cpu see here: https://github.com/TheJLifeX/mediapipe/blob/a069e5b6e1097f3f69c161a11f336e9e3b9751dd/mediapipe/graphs/hand_tracking/subgraphs/BUILD#L88
in the hand_landmark_gpu see here: https://github.com/TheJLifeX/mediapipe/blob/a069e5b6e1097f3f69c161a11f336e9e3b9751dd/mediapipe/graphs/hand_tracking/subgraphs/BUILD#L192


## 08-README.md

      
    Raw
  

              08-README.md
            
          
    You can now build the project and run it.
	#include <cmath>
	#include "mediapipe/framework/calculator_framework.h"
	#include "mediapipe/framework/formats/landmark.pb.h"
	#include "mediapipe/framework/formats/rect.pb.h"

	namespace mediapipe
	{

	namespace
	{
	constexpr char normRectTag[] = "NORM_RECT";
	constexpr char normalizedLandmarkListTag[] = "NORM_LANDMARKS";
	} // namespace

	// Graph config:
	//
	// node {
	// calculator: "HandGestureRecognitionCalculator"
	// input_stream: "NORM_LANDMARKS:scaled_landmarks"
	// input_stream: "NORM_RECT:hand_rect_for_next_frame"
	// }
	class HandGestureRecognitionCalculator : public CalculatorBase
	{
	public:
	static ::mediapipe::Status GetContract(CalculatorContract *cc);
	::mediapipe::Status Open(CalculatorContext *cc) override;

	::mediapipe::Status Process(CalculatorContext *cc) override;

	private:
	float get_Euclidean_DistanceAB(float a_x, float a_y, float b_x, float b_y)
	{
	float dist = std::pow(a_x - b_x, 2) + pow(a_y - b_y, 2);
	return std::sqrt(dist);
	}

	bool isThumbNearFirstFinger(NormalizedLandmark point1, NormalizedLandmark point2)
	{
	float distance = this->get_Euclidean_DistanceAB(point1.x(), point1.y(), point2.x(), point2.y());
	return distance < 0.1;
	}
	};

	REGISTER_CALCULATOR(HandGestureRecognitionCalculator);

	::mediapipe::Status HandGestureRecognitionCalculator::GetContract(
	CalculatorContract *cc)
	{
	RET_CHECK(cc->Inputs().HasTag(normalizedLandmarkListTag));
	cc->Inputs().Tag(normalizedLandmarkListTag).Set<mediapipe::NormalizedLandmarkList>();

	RET_CHECK(cc->Inputs().HasTag(normRectTag));
	cc->Inputs().Tag(normRectTag).Set<NormalizedRect>();

	return ::mediapipe::OkStatus();
	}

	::mediapipe::Status HandGestureRecognitionCalculator::Open(
	CalculatorContext *cc)
	{
	cc->SetOffset(TimestampDiff(0));
	return ::mediapipe::OkStatus();
	}

	::mediapipe::Status HandGestureRecognitionCalculator::Process(
	CalculatorContext *cc)
	{
	// hand closed (red) rectangle
	const auto rect = &(cc->Inputs().Tag(normRectTag).Get<NormalizedRect>());
	float width = rect->width();
	float height = rect->height();

	if (width < 0.01 \|\| height < 0.01)
	{
	LOG(INFO) << "No Hand Detected";
	return ::mediapipe::OkStatus();
	}

	const auto &landmarkList = cc->Inputs()
	.Tag(normalizedLandmarkListTag)
	.Get<mediapipe::NormalizedLandmarkList>();
	RET_CHECK_GT(landmarkList.landmark_size(), 0) << "Input landmark vector is empty.";

	// finger states
	bool thumbIsOpen = false;
	bool firstFingerIsOpen = false;
	bool secondFingerIsOpen = false;
	bool thirdFingerIsOpen = false;
	bool fourthFingerIsOpen = false;
	//

	float pseudoFixKeyPoint = landmarkList.landmark(2).x();
	if (landmarkList.landmark(3).x() < pseudoFixKeyPoint && landmarkList.landmark(4).x() < pseudoFixKeyPoint)
	{
	thumbIsOpen = true;
	}

	pseudoFixKeyPoint = landmarkList.landmark(6).y();
	if (landmarkList.landmark(7).y() < pseudoFixKeyPoint && landmarkList.landmark(8).y() < pseudoFixKeyPoint)
	{
	firstFingerIsOpen = true;
	}

	pseudoFixKeyPoint = landmarkList.landmark(10).y();
	if (landmarkList.landmark(11).y() < pseudoFixKeyPoint && landmarkList.landmark(12).y() < pseudoFixKeyPoint)
	{
	secondFingerIsOpen = true;
	}

	pseudoFixKeyPoint = landmarkList.landmark(14).y();
	if (landmarkList.landmark(15).y() < pseudoFixKeyPoint && landmarkList.landmark(16).y() < pseudoFixKeyPoint)
	{
	thirdFingerIsOpen = true;
	}

	pseudoFixKeyPoint = landmarkList.landmark(18).y();
	if (landmarkList.landmark(19).y() < pseudoFixKeyPoint && landmarkList.landmark(20).y() < pseudoFixKeyPoint)
	{
	fourthFingerIsOpen = true;
	}

	// Hand gesture recognition
	if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen)
	{
	LOG(INFO) << "FIVE!";
	}
	else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen)
	{
	LOG(INFO) << "FOUR!";
	}
	else if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
	{
	LOG(INFO) << "TREE!";
	}
	else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
	{
	LOG(INFO) << "TWO!";
	}
	else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
	{
	LOG(INFO) << "ONE!";
	}
	else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
	{
	LOG(INFO) << "YEAH!";
	}
	else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen)
	{
	LOG(INFO) << "ROCK!";
	}
	else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen)
	{
	LOG(INFO) << "SPIDERMAN!";
	}
	else if (!thumbIsOpen && !firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen)
	{
	LOG(INFO) << "FIST!";
	}
	else if (!firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen && this->isThumbNearFirstFinger(landmarkList.landmark(4), landmarkList.landmark(8)))
	{
	LOG(INFO) << "OK!";
	}
	else
	{
	LOG(INFO) << "Finger States: " << thumbIsOpen << firstFingerIsOpen << secondFingerIsOpen << thirdFingerIsOpen << fourthFingerIsOpen;
	LOG(INFO) << "___";
	}

	return ::mediapipe::OkStatus();
	} // namespace mediapipe

	} // namespace mediapipe
	cc_library(
	name = "hand-gesture-recognition-calculator",
	srcs = ["hand-gesture-recognition-calculator.cc"],
	visibility = ["//visibility:public"],
	deps = [
	"//mediapipe/framework:calculator_framework",
	"//mediapipe/framework/formats:landmark_cc_proto",
	"//mediapipe/framework/port:status",
	"//mediapipe/framework/formats:rect_cc_proto",
	"//mediapipe/framework/port:ret_check",
	],
	alwayslink = 1,
	)