|
#include <cmath> |
|
#include "mediapipe/framework/calculator_framework.h" |
|
#include "mediapipe/framework/formats/landmark.pb.h" |
|
#include "mediapipe/framework/formats/rect.pb.h" |
|
|
|
namespace mediapipe |
|
{ |
|
|
|
namespace |
|
{ |
|
constexpr char normRectTag[] = "NORM_RECT"; |
|
constexpr char normalizedLandmarkListTag[] = "NORM_LANDMARKS"; |
|
} // namespace |
|
|
|
// Graph config: |
|
// |
|
// node { |
|
// calculator: "HandGestureRecognitionCalculator" |
|
// input_stream: "NORM_LANDMARKS:scaled_landmarks" |
|
// input_stream: "NORM_RECT:hand_rect_for_next_frame" |
|
// } |
|
class HandGestureRecognitionCalculator : public CalculatorBase |
|
{ |
|
public: |
|
static ::mediapipe::Status GetContract(CalculatorContract *cc); |
|
::mediapipe::Status Open(CalculatorContext *cc) override; |
|
|
|
::mediapipe::Status Process(CalculatorContext *cc) override; |
|
|
|
private: |
|
float get_Euclidean_DistanceAB(float a_x, float a_y, float b_x, float b_y) |
|
{ |
|
float dist = std::pow(a_x - b_x, 2) + pow(a_y - b_y, 2); |
|
return std::sqrt(dist); |
|
} |
|
|
|
bool isThumbNearFirstFinger(NormalizedLandmark point1, NormalizedLandmark point2) |
|
{ |
|
float distance = this->get_Euclidean_DistanceAB(point1.x(), point1.y(), point2.x(), point2.y()); |
|
return distance < 0.1; |
|
} |
|
}; |
|
|
|
REGISTER_CALCULATOR(HandGestureRecognitionCalculator); |
|
|
|
::mediapipe::Status HandGestureRecognitionCalculator::GetContract( |
|
CalculatorContract *cc) |
|
{ |
|
RET_CHECK(cc->Inputs().HasTag(normalizedLandmarkListTag)); |
|
cc->Inputs().Tag(normalizedLandmarkListTag).Set<mediapipe::NormalizedLandmarkList>(); |
|
|
|
RET_CHECK(cc->Inputs().HasTag(normRectTag)); |
|
cc->Inputs().Tag(normRectTag).Set<NormalizedRect>(); |
|
|
|
return ::mediapipe::OkStatus(); |
|
} |
|
|
|
::mediapipe::Status HandGestureRecognitionCalculator::Open( |
|
CalculatorContext *cc) |
|
{ |
|
cc->SetOffset(TimestampDiff(0)); |
|
return ::mediapipe::OkStatus(); |
|
} |
|
|
|
::mediapipe::Status HandGestureRecognitionCalculator::Process( |
|
CalculatorContext *cc) |
|
{ |
|
// hand closed (red) rectangle |
|
const auto rect = &(cc->Inputs().Tag(normRectTag).Get<NormalizedRect>()); |
|
float width = rect->width(); |
|
float height = rect->height(); |
|
|
|
if (width < 0.01 || height < 0.01) |
|
{ |
|
LOG(INFO) << "No Hand Detected"; |
|
return ::mediapipe::OkStatus(); |
|
} |
|
|
|
const auto &landmarkList = cc->Inputs() |
|
.Tag(normalizedLandmarkListTag) |
|
.Get<mediapipe::NormalizedLandmarkList>(); |
|
RET_CHECK_GT(landmarkList.landmark_size(), 0) << "Input landmark vector is empty."; |
|
|
|
// finger states |
|
bool thumbIsOpen = false; |
|
bool firstFingerIsOpen = false; |
|
bool secondFingerIsOpen = false; |
|
bool thirdFingerIsOpen = false; |
|
bool fourthFingerIsOpen = false; |
|
// |
|
|
|
float pseudoFixKeyPoint = landmarkList.landmark(2).x(); |
|
if (landmarkList.landmark(3).x() < pseudoFixKeyPoint && landmarkList.landmark(4).x() < pseudoFixKeyPoint) |
|
{ |
|
thumbIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(6).y(); |
|
if (landmarkList.landmark(7).y() < pseudoFixKeyPoint && landmarkList.landmark(8).y() < pseudoFixKeyPoint) |
|
{ |
|
firstFingerIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(10).y(); |
|
if (landmarkList.landmark(11).y() < pseudoFixKeyPoint && landmarkList.landmark(12).y() < pseudoFixKeyPoint) |
|
{ |
|
secondFingerIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(14).y(); |
|
if (landmarkList.landmark(15).y() < pseudoFixKeyPoint && landmarkList.landmark(16).y() < pseudoFixKeyPoint) |
|
{ |
|
thirdFingerIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(18).y(); |
|
if (landmarkList.landmark(19).y() < pseudoFixKeyPoint && landmarkList.landmark(20).y() < pseudoFixKeyPoint) |
|
{ |
|
fourthFingerIsOpen = true; |
|
} |
|
|
|
// Hand gesture recognition |
|
if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "FIVE!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "FOUR!"; |
|
} |
|
else if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "TREE!"; |
|
} |
|
else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "TWO!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "ONE!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "YEAH!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "ROCK!"; |
|
} |
|
else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "SPIDERMAN!"; |
|
} |
|
else if (!thumbIsOpen && !firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "FIST!"; |
|
} |
|
else if (!firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen && this->isThumbNearFirstFinger(landmarkList.landmark(4), landmarkList.landmark(8))) |
|
{ |
|
LOG(INFO) << "OK!"; |
|
} |
|
else |
|
{ |
|
LOG(INFO) << "Finger States: " << thumbIsOpen << firstFingerIsOpen << secondFingerIsOpen << thirdFingerIsOpen << fourthFingerIsOpen; |
|
LOG(INFO) << "___"; |
|
} |
|
|
|
return ::mediapipe::OkStatus(); |
|
} // namespace mediapipe |
|
|
|
} // namespace mediapipe |
It will be very helpful if you can guide in how to proceed with this mediapipe hand recognition in windows. I cant find much resources for how to use mediapipe in windows and use it for hand gesture recognition. And also what do you think that using the windows subsystem for Linux will be better than windows for this medaipipe hand guesture.