Skip to content

Instantly share code, notes, and snippets.

@mgyong
Created November 1, 2019 23:54
Show Gist options
  • Save mgyong/ef387feab3de99bbdbd3bc792a5f4c68 to your computer and use it in GitHub Desktop.
Save mgyong/ef387feab3de99bbdbd3bc792a5f4c68 to your computer and use it in GitHub Desktop.
MediaPipe issue https://github.com/google/mediapipe/issues/202 location of file /mediapipe/mediapipe/graphs/hand_tracking/subgraphs/hand_landmark_gpu.pbtxt
# MediaPipe hand landmark localization subgraph.
type: "HandLandmarkSubgraph"
input_stream: "IMAGE:input_video"
input_stream: "NORM_RECT:hand_rect"
output_stream: "LANDMARKS:hand_landmarks"
output_stream: "NORM_RECT:hand_rect_for_next_frame"
output_stream: "PRESENCE:hand_presence"
# Crops the rectangle that contains a hand from the input image.
node {
calculator: "ImageCroppingCalculator"
input_stream: "IMAGE_GPU:input_video"
input_stream: "NORM_RECT:hand_rect"
output_stream: "IMAGE_GPU:hand_image"
}
# Transforms the input image on GPU to a 256x256 image. To scale the input
# image, the scale_mode option is set to FIT to preserve the aspect ratio,
# resulting in potential letterboxing in the transformed image.
node: {
calculator: "ImageTransformationCalculator"
input_stream: "IMAGE_GPU:hand_image"
output_stream: "IMAGE_GPU:transformed_hand_image"
output_stream: "LETTERBOX_PADDING:letterbox_padding"
node_options: {
[type.googleapis.com/mediapipe.ImageTransformationCalculatorOptions] {
output_width: 256
output_height: 256
scale_mode: FIT
}
}
}
# Converts the transformed input image on GPU into an image tensor stored as a
# TfLiteTensor.
node {
calculator: "TfLiteConverterCalculator"
input_stream: "IMAGE_GPU:transformed_hand_image"
output_stream: "TENSORS_GPU:image_tensor"
node_options: {
[type.googleapis.com/mediapipe.TfLiteConverterCalculatorOptions] {
zero_center: false
}
}
}
# Runs a TensorFlow Lite model on GPU that takes an image tensor and outputs a
# vector of tensors representing, for instance, detection boxes/keypoints and
# scores.
node {
calculator: "TfLiteInferenceCalculator"
input_stream: "TENSORS_GPU:image_tensor"
output_stream: "TENSORS:output_tensors"
node_options: {
[type.googleapis.com/mediapipe.TfLiteInferenceCalculatorOptions] {
model_path: "mediapipe/models/hand_landmark_3d.tflite"
use_gpu: true
}
}
}
# Splits a vector of tensors into multiple vectors.
node {
calculator: "SplitTfLiteTensorVectorCalculator"
input_stream: "output_tensors"
output_stream: "landmark_tensors"
output_stream: "hand_flag_tensor"
node_options: {
[type.googleapis.com/mediapipe.SplitVectorCalculatorOptions] {
ranges: { begin: 0 end: 1 }
ranges: { begin: 1 end: 2 }
}
}
}
# Converts the hand-flag tensor into a float that represents the confidence
# score of hand presence.
node {
calculator: "TfLiteTensorsToFloatsCalculator"
input_stream: "TENSORS:hand_flag_tensor"
output_stream: "FLOAT:hand_presence_score"
}
# Applies a threshold to the confidence score to determine whether a hand is
# present.
node {
calculator: "ThresholdingCalculator"
input_stream: "FLOAT:hand_presence_score"
output_stream: "FLAG:hand_presence"
node_options: {
[type.googleapis.com/mediapipe.ThresholdingCalculatorOptions] {
threshold: 0.1
}
}
}
# Decodes the landmark tensors into a vector of lanmarks, where the landmark
# coordinates are normalized by the size of the input image to the model.
node {
calculator: "TfLiteTensorsToLandmarksCalculator"
input_stream: "TENSORS:landmark_tensors"
output_stream: "NORM_LANDMARKS:landmarks"
node_options: {
[type.googleapis.com/mediapipe.TfLiteTensorsToLandmarksCalculatorOptions] {
num_landmarks: 21
input_image_width: 256
input_image_height: 256
}
}
}
# Adjusts landmarks (already normalized to [0.f, 1.f]) on the letterboxed hand
# image (after image transformation with the FIT scale mode) to the
# corresponding locations on the same image with the letterbox removed (hand
# image before image transformation).
node {
calculator: "LandmarkLetterboxRemovalCalculator"
input_stream: "LANDMARKS:landmarks"
input_stream: "LETTERBOX_PADDING:letterbox_padding"
output_stream: "LANDMARKS:scaled_landmarks"
}
# Projects the landmarks from the cropped hand image to the corresponding
# locations on the full image before cropping (input to the graph).
node {
calculator: "LandmarkProjectionCalculator"
input_stream: "NORM_LANDMARKS:scaled_landmarks"
input_stream: "NORM_RECT:hand_rect"
output_stream: "NORM_LANDMARKS:hand_landmarks"
}
# Extracts image size from the input images.
node {
calculator: "ImagePropertiesCalculator"
input_stream: "IMAGE_GPU:input_video"
output_stream: "SIZE:image_size"
}
# Converts hand landmarks to a detection that tightly encloses all landmarks.
node {
calculator: "LandmarksToDetectionCalculator"
input_stream: "NORM_LANDMARKS:hand_landmarks"
output_stream: "DETECTION:hand_detection"
}
# Converts the hand detection into a rectangle (normalized by image size)
# that encloses the hand and is rotated such that the line connecting center of
# the wrist and MCP of the middle finger is aligned with the Y-axis of the
# rectangle.
node {
calculator: "DetectionsToRectsCalculator"
input_stream: "DETECTION:hand_detection"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "NORM_RECT:hand_rect_from_landmarks"
node_options: {
[type.googleapis.com/mediapipe.DetectionsToRectsCalculatorOptions] {
rotation_vector_start_keypoint_index: 0 # Center of wrist.
rotation_vector_end_keypoint_index: 9 # MCP of middle finger.
rotation_vector_target_angle_degrees: 90
}
}
}
# Expands the hand rectangle so that in the next video frame it's likely to
# still contain the hand even with some motion.
node {
calculator: "RectTransformationCalculator"
input_stream: "NORM_RECT:hand_rect_from_landmarks"
input_stream: "IMAGE_SIZE:image_size"
output_stream: "hand_rect_for_next_frame"
node_options: {
[type.googleapis.com/mediapipe.RectTransformationCalculatorOptions] {
scale_x: 1.6
scale_y: 1.6
square_long: true
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment