|
#include <cmath> |
|
#include "mediapipe/framework/calculator_framework.h" |
|
#include "mediapipe/framework/formats/landmark.pb.h" |
|
#include "mediapipe/framework/formats/rect.pb.h" |
|
|
|
namespace mediapipe |
|
{ |
|
|
|
namespace |
|
{ |
|
constexpr char normRectTag[] = "NORM_RECT"; |
|
constexpr char normalizedLandmarkListTag[] = "NORM_LANDMARKS"; |
|
} // namespace |
|
|
|
// Graph config: |
|
// |
|
// node { |
|
// calculator: "HandGestureRecognitionCalculator" |
|
// input_stream: "NORM_LANDMARKS:scaled_landmarks" |
|
// input_stream: "NORM_RECT:hand_rect_for_next_frame" |
|
// } |
|
class HandGestureRecognitionCalculator : public CalculatorBase |
|
{ |
|
public: |
|
static ::mediapipe::Status GetContract(CalculatorContract *cc); |
|
::mediapipe::Status Open(CalculatorContext *cc) override; |
|
|
|
::mediapipe::Status Process(CalculatorContext *cc) override; |
|
|
|
private: |
|
float get_Euclidean_DistanceAB(float a_x, float a_y, float b_x, float b_y) |
|
{ |
|
float dist = std::pow(a_x - b_x, 2) + pow(a_y - b_y, 2); |
|
return std::sqrt(dist); |
|
} |
|
|
|
bool isThumbNearFirstFinger(NormalizedLandmark point1, NormalizedLandmark point2) |
|
{ |
|
float distance = this->get_Euclidean_DistanceAB(point1.x(), point1.y(), point2.x(), point2.y()); |
|
return distance < 0.1; |
|
} |
|
}; |
|
|
|
REGISTER_CALCULATOR(HandGestureRecognitionCalculator); |
|
|
|
::mediapipe::Status HandGestureRecognitionCalculator::GetContract( |
|
CalculatorContract *cc) |
|
{ |
|
RET_CHECK(cc->Inputs().HasTag(normalizedLandmarkListTag)); |
|
cc->Inputs().Tag(normalizedLandmarkListTag).Set<mediapipe::NormalizedLandmarkList>(); |
|
|
|
RET_CHECK(cc->Inputs().HasTag(normRectTag)); |
|
cc->Inputs().Tag(normRectTag).Set<NormalizedRect>(); |
|
|
|
return ::mediapipe::OkStatus(); |
|
} |
|
|
|
::mediapipe::Status HandGestureRecognitionCalculator::Open( |
|
CalculatorContext *cc) |
|
{ |
|
cc->SetOffset(TimestampDiff(0)); |
|
return ::mediapipe::OkStatus(); |
|
} |
|
|
|
::mediapipe::Status HandGestureRecognitionCalculator::Process( |
|
CalculatorContext *cc) |
|
{ |
|
// hand closed (red) rectangle |
|
const auto rect = &(cc->Inputs().Tag(normRectTag).Get<NormalizedRect>()); |
|
float width = rect->width(); |
|
float height = rect->height(); |
|
|
|
if (width < 0.01 || height < 0.01) |
|
{ |
|
LOG(INFO) << "No Hand Detected"; |
|
return ::mediapipe::OkStatus(); |
|
} |
|
|
|
const auto &landmarkList = cc->Inputs() |
|
.Tag(normalizedLandmarkListTag) |
|
.Get<mediapipe::NormalizedLandmarkList>(); |
|
RET_CHECK_GT(landmarkList.landmark_size(), 0) << "Input landmark vector is empty."; |
|
|
|
// finger states |
|
bool thumbIsOpen = false; |
|
bool firstFingerIsOpen = false; |
|
bool secondFingerIsOpen = false; |
|
bool thirdFingerIsOpen = false; |
|
bool fourthFingerIsOpen = false; |
|
// |
|
|
|
float pseudoFixKeyPoint = landmarkList.landmark(2).x(); |
|
if (landmarkList.landmark(3).x() < pseudoFixKeyPoint && landmarkList.landmark(4).x() < pseudoFixKeyPoint) |
|
{ |
|
thumbIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(6).y(); |
|
if (landmarkList.landmark(7).y() < pseudoFixKeyPoint && landmarkList.landmark(8).y() < pseudoFixKeyPoint) |
|
{ |
|
firstFingerIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(10).y(); |
|
if (landmarkList.landmark(11).y() < pseudoFixKeyPoint && landmarkList.landmark(12).y() < pseudoFixKeyPoint) |
|
{ |
|
secondFingerIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(14).y(); |
|
if (landmarkList.landmark(15).y() < pseudoFixKeyPoint && landmarkList.landmark(16).y() < pseudoFixKeyPoint) |
|
{ |
|
thirdFingerIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(18).y(); |
|
if (landmarkList.landmark(19).y() < pseudoFixKeyPoint && landmarkList.landmark(20).y() < pseudoFixKeyPoint) |
|
{ |
|
fourthFingerIsOpen = true; |
|
} |
|
|
|
// Hand gesture recognition |
|
if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "FIVE!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "FOUR!"; |
|
} |
|
else if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "TREE!"; |
|
} |
|
else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "TWO!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "ONE!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "YEAH!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "ROCK!"; |
|
} |
|
else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "SPIDERMAN!"; |
|
} |
|
else if (!thumbIsOpen && !firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "FIST!"; |
|
} |
|
else if (!firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen && this->isThumbNearFirstFinger(landmarkList.landmark(4), landmarkList.landmark(8))) |
|
{ |
|
LOG(INFO) << "OK!"; |
|
} |
|
else |
|
{ |
|
LOG(INFO) << "Finger States: " << thumbIsOpen << firstFingerIsOpen << secondFingerIsOpen << thirdFingerIsOpen << fourthFingerIsOpen; |
|
LOG(INFO) << "___"; |
|
} |
|
|
|
return ::mediapipe::OkStatus(); |
|
} // namespace mediapipe |
|
|
|
} // namespace mediapipe |
@TheJLifeX, Why does it show the wrong gesture when hand is flipped. Like it recognizes rock correctly when the palm is facing the camera but as the hand is flipped(palm is not facing the camera) it shows the incorrect output or recognition. And also can you please suggest how to recognize thumbs up and thumbs down.