kilroythethird/nodes.py Secret

## nodes.py
import torch
from collections import defaultdict
from .utils import *
from transformers import pipeline

class GenderFaceFilter:
    @classmethod
    def INPUT_TYPES(cls):
        return {
            'required': {
                'faces': ('FACE',),
                'gender': (['man', 'woman'],)
            }
        }

    RETURN_TYPES = ('FACE', 'FACE')
    RETURN_NAMES = ('filtered', 'rest')
    FUNCTION = 'run'
    CATEGORY = 'facetools'

    def run(self, faces, gender):
        filtered = []
        rest = []
        pipe = pipeline('image-classification', model='dima806/man_woman_face_image_detection', device=0)
        for face in faces:
            _, im = face.crop(224, 1.2)
            im = im.permute(0,3,1,2)[0]
            im = tv.transforms.functional.resize(im, (224,224))
            r = pipe(tv.transforms.functional.to_pil_image(im))
            idx = np.argmax([i['score'] for i in r])
            if r[idx]['label'] == gender:
                filtered.append(face)
            else:
                rest.append(face)
        return (filtered, rest)

class OrderedFaceFilter:
    @classmethod
    def INPUT_TYPES(cls):
        return {
            'required': {
                'faces': ('FACE',),
                'criteria': (['area'],),
                'order': (['descending', 'ascending'],),
                'take_start': ('INT', {'default': 0, 'min': 0, 'step': 1}),
                'take_count': ('INT', {'default': 1, 'min': 1, 'step': 1}),
            }
        }

    RETURN_TYPES = ('FACE', 'FACE')
    RETURN_NAMES = ('filtered', 'rest')
    FUNCTION = 'run'
    CATEGORY = 'facetools'

    def run(self, faces, criteria, order, take_start, take_count):
        filtered = []
        rest = []
        funs = {
            'area': lambda face: face.w * face.h
        }
        sorted_faces = sorted(faces, key=funs[criteria], reverse=order == 'descending')
        filtered = sorted_faces[take_start:take_start+take_count]
        rest = sorted_faces[:take_start] + sorted_faces[take_start+take_count:]
        return (filtered, rest)

import time
class DetectFaces:
    @classmethod
    def INPUT_TYPES(cls):
        return {
            'required': {
                'image': ('IMAGE',),
                'threshold': ('FLOAT', {'default': 0.5, 'min': 0.0, 'max': 1.0, 'step': 0.01}),
                'min_size': ('INT', {'default': 64, 'max': 512, 'step': 8}),
                'max_size': ('INT', {'default': 512, 'min': 512, 'step': 8}),
            },
            'optional': {
                'mask': ('MASK',),
                'scale_to_max_size': ("BOOLEAN", {'default': False})
            }
        }

    RETURN_TYPES = ('FACE', "IMAGE")
    RETURN_NAMES = ('faces', "image")
    FUNCTION = 'run'
    CATEGORY = 'facetools'

    def run(self, image, threshold, min_size, max_size, mask=None, scale_to_max_size=False):
        faces = []
        is_downscaled = False
        # Because of rounding issues we need this for both axes
        # ATM there might still be a slight issue because of the rounding.
        scale = 1
        dImg = image
        if scale_to_max_size:
            print("Image shape", image.shape, image.dtype)
            max_side = max(image.shape[1], image.shape[2])
            scale = max_size / max_side

            if scale < 1.0:
                nh = round(image.shape[1] * scale)
                nw = round(image.shape[2] * scale)
                print("Scaling down image from %sx%s to %sx%s (%s) for detection" % (image.shape[1], image.shape[2], nh, nw, scale))
                is_downscaled = True
                delta = time.time()
                dImg = dImg.permute(0,3,1,2)
                dImg = tv.transforms.functional.resize(dImg, (nh, nw))
                dImg = dImg.permute(0,2,3,1)
                delta = time.time() - delta
                print("[?] Downsizing took %s secs" % delta)
                # TODO: also resize masks ?

        masked = image
        if mask is not None:
            delta = time.time()
            masked = image * tv.transforms.functional.resize(1-mask, image.shape[1:3])[..., None]
            delta = time.time() - delta
            print("[?] Apply mask took %s secs" % delta)

        delta = time.time()
        masked = (masked * 255).type(torch.uint8)
        maskedD = (dImg * 255).type(torch.uint8) # TODO: optimize we probably don't need to mask both.. For sure not if we are not even downsizing
        for i, _x in enumerate(zip(masked, maskedD)):
            img, dImg = _x
            unfiltered_faces = detect_faces(img, threshold, dImg, scale)
            for face in unfiltered_faces:
                a, b, c, d = face.bbox
                h = abs(d-b)
                w = abs(c-a)
                # TODO: this should be an and anyway ? ATM this only checks for the shorter axis on max check ?
                if ((is_downscaled or (h <= max_size or w <= max_size))) and (min_size <= h or min_size <= w):
                    face.image_idx = i
                    face.img = image[i]
                    faces.append(face)
                else:
                    print("Skip face %i with %ix%i" % (i, w, h))
        delta = time.time() - delta
        print("[?] Full detect face took %s secs" % delta)
        return (faces, image)

class CropFaces:
    @classmethod
    def INPUT_TYPES(cls):
        return {
            'required': {
                'faces': ('FACE',),
                'crop_size': ('INT', {'default': 512, 'min': 512, 'max': 1024, 'step': 128}),
                'crop_factor': ('FLOAT', {'default': 1.5, 'min': 1.0, 'max': 3, 'step': 0.1}),
                'mask_type': (mask_types,)
            }
        }

    RETURN_TYPES = ('IMAGE', 'MASK', 'WARP')
    RETURN_NAMES = ('crops', 'masks', 'warps')
    FUNCTION = 'run'
    CATEGORY = 'facetools'

    def run(self, faces, crop_size, crop_factor, mask_type):
        if len(faces) == 0:
            empty_crop = torch.zeros((1,512,512,3))
            empty_mask = torch.zeros((1,512,512))
            empty_warp = np.array([
                [1,0,-512],
                [0,1,-512],
            ], dtype=np.float32)
            return (empty_crop, empty_mask, [empty_warp])

        crops = []
        masks = []
        warps = []
        for face in faces:
            M, crop = face.crop(crop_size, crop_factor)
            mask = mask_crop(face, M, crop, mask_type)
            crops.append(np.array(crop[0]))
            masks.append(np.array(mask[0]))
            warps.append(M)
        crops = torch.from_numpy(np.array(crops)).type(torch.float32)
        masks = torch.from_numpy(np.array(masks)).type(torch.float32)
        return (crops, masks, warps)

class WarpFaceBack:
    RETURN_TYPES = ('IMAGE',)
    FUNCTION = 'run'
    CATEGORY = 'facetools'

    @classmethod
    def INPUT_TYPES(cls):
        return {
            'required': {
                'images': ('IMAGE',),
                'face': ('FACE',),
                'crop': ('IMAGE',),
                'mask': ('MASK',),
                'warp': ('WARP',),
            },
            'optional': {
                'multi_images': ('BOOLEAN', {'default': False}),
            }
        }

    def run(self, images, face, crop, mask, warp, multi_images=False):

        groups = defaultdict(list)
        for f,c,m,w in zip(face, crop, mask, warp):
            groups[f.image_idx].append((f.img,c,m,w))
            print("Faceidx: ", f.image_idx)

        def _exec(image, values):
            #values = groups[i]
            crop, mask, warp = list(zip(*[x[1:] for x in values]))
            warped_masks = [cv2.warpAffine(single_mask.numpy(),
                            cv2.invertAffineTransform(single_warp),
                            image.shape[1::-1])
                            for single_warp, single_mask in zip(warp, mask)]
            full_mask = np.add.reduce(warped_masks, axis=0)[...,None]
            swapped = np.add.reduce([
                cv2.warpAffine(single_crop.cpu().numpy(),
                            cv2.invertAffineTransform(single_warp),
                            image.shape[1::-1]
                            ) * single_mask[..., None]
                for single_crop, single_mask, single_warp in zip(crop, warped_masks, warp)
            ], axis=0) / np.maximum(1, full_mask)
            full_mask = np.minimum(1, full_mask)
            result = (swapped + (1 - full_mask) * image.numpy())
            result = torch.from_numpy(result)
            return result

        results = []
        print("Warp back Images: ", images.shape)
        for i, image in enumerate(images):
            if i not in groups:
                print("No face found on", i)
                result = image
                results.append(result)
            else:
                values = groups[i]
                if(multi_images):
                    # TODO: atm batches as input are wonkey in this mode.
                    # Will return the sum of all found faces + "empty" images for now
                    for v in values:
                        result = _exec(image, (v,))
                        results.append(result)
                else:
                    result = _exec(image, values)
                    results.append(result)

        results = torch.stack(results)
        return (results, )

class MergeWarps:
    RETURN_TYPES = ('IMAGE','MASK','WARP')
    FUNCTION = 'run'
    CATEGORY = 'facetools'

    @classmethod
    def INPUT_TYPES(cls):
        return {
            'required': {
                'crop0': ('IMAGE',),
                'mask0': ('MASK',),
                'warp0': ('WARP',),
                'crop1': ('IMAGE',),
                'mask1': ('MASK',),
                'warp1': ('WARP',),
            }
        }

    def run(self, crop0, mask0, warp0, crop1, mask1, warp1):
        crops = torch.vstack((crop0, crop1))
        masks = torch.vstack((mask0, mask1))
        warps = warp0 + warp1
        return (crops, masks, warps)

class BiSeNetMask:
    RETURN_TYPES = ('MASK',)
    FUNCTION = 'run'
    CATEGORY = 'facetools'

    @classmethod
    def INPUT_TYPES(cls):
        return {
            'required': {
                'crop': ('IMAGE',),
                'skin': ('BOOLEAN', {'default': True}),
                'left_brow': ('BOOLEAN', {'default': True}),
                'right_brow': ('BOOLEAN', {'default': True}),
                'left_eye': ('BOOLEAN', {'default': True}),
                'right_eye': ('BOOLEAN', {'default': True}),
                'eyeglasses': ('BOOLEAN', {'default': True}),
                'left_ear': ('BOOLEAN', {'default': True}),
                'right_ear': ('BOOLEAN', {'default': True}),
                'earring': ('BOOLEAN', {'default': True}),
                'nose': ('BOOLEAN', {'default': True}),
                'mouth': ('BOOLEAN', {'default': True}),
                'upper_lip': ('BOOLEAN', {'default': True}),
                'lower_lip': ('BOOLEAN', {'default': True}),
                'neck': ('BOOLEAN', {'default': False}),
                'necklace': ('BOOLEAN', {'default': False}),
                'cloth': ('BOOLEAN', {'default': False}),
                'hair': ('BOOLEAN', {'default': False}),
                'hat': ('BOOLEAN', {'default': False}),
            }
        }

    def run(self, crop, skin, left_brow, right_brow, left_eye, right_eye, eyeglasses,
            left_ear, right_ear, earring, nose, mouth, upper_lip, lower_lip,
            neck, necklace, cloth, hair, hat):
        masks = mask_BiSeNet(crop, skin, left_brow, right_brow, left_eye, right_eye, eyeglasses,
            left_ear, right_ear, earring, nose, mouth, upper_lip, lower_lip,
            neck, necklace, cloth, hair, hat)
        return (masks, )

class JonathandinuMask:
    RETURN_TYPES = ('MASK',)
    FUNCTION = 'run'
    CATEGORY = 'facetools'

    @classmethod
    def INPUT_TYPES(cls):
        return {
            'required': {
                'crop': ('IMAGE',),
                'skin': ('BOOLEAN', {'default': True}),
                'nose': ('BOOLEAN', {'default': True}),
                'eyeglasses': ('BOOLEAN', {'default': False}),
                'left_eye': ('BOOLEAN', {'default': True}),
                'right_eye': ('BOOLEAN', {'default': True}),
                'left_brow': ('BOOLEAN', {'default': True}),
                'right_brow': ('BOOLEAN', {'default': True}),
                'left_ear': ('BOOLEAN', {'default': True}),
                'right_ear': ('BOOLEAN', {'default': True}),
                'mouth': ('BOOLEAN', {'default': True}),
                'upper_lip': ('BOOLEAN', {'default': True}),
                'lower_lip': ('BOOLEAN', {'default': True}),
                'hair': ('BOOLEAN', {'default': False}),
                'hat': ('BOOLEAN', {'default': False}),
                'earring': ('BOOLEAN', {'default': False}),
                'necklace': ('BOOLEAN', {'default': False}),
                'neck': ('BOOLEAN', {'default': False}),
                'cloth': ('BOOLEAN', {'default': False}),
            }
        }

    def run(self, crop, skin, nose, eyeglasses, left_eye, right_eye, left_brow, right_brow, left_ear, right_ear,
            mouth, upper_lip, lower_lip, hair, hat, earring, necklace, neck, cloth):
        masks = mask_jonathandinu(crop, skin, nose, eyeglasses, left_eye, right_eye, left_brow, right_brow, left_ear, right_ear,
                             mouth, upper_lip, lower_lip, hair, hat, earring, necklace, neck, cloth)
        return (masks, )

NODE_CLASS_MAPPINGS = {
    'DetectFaces': DetectFaces,
    'CropFaces': CropFaces,
    'WarpFacesBack': WarpFaceBack,
    'BiSeNetMask': BiSeNetMask,
    'JonathandinuMask': JonathandinuMask,
    'MergeWarps': MergeWarps,
    'GenderFaceFilter': GenderFaceFilter,
    'OrderedFaceFilter': OrderedFaceFilter,
}

NODE_DISPLAY_NAME_MAPPINGS = {
    'DetectFaces': 'DetectFaces',
    'CropFaces': 'CropFaces',
    'WarpFacesBack': 'Warp Faces Back',
    'BiSeNetMask': 'BiSeNet Mask',
    'JonathandinuMask': 'Jonathandinu Mask',
    'MergeWarps': 'Merge Warps',
    'GenderFaceFilter': 'Gender Face Filter',
    'OrderedFaceFilter': 'Ordered Face Filter',
}

## utils.py
import os
import torch
import torchvision as tv
import numpy as np
import cv2
import mediapipe as mp
from scipy.spatial import ConvexHull
from folder_paths import models_dir
from .BiSeNet import BiSeNet
from ultralytics import YOLO
from onnxruntime import InferenceSession
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
from skimage import transform as trans


arcface_dst = np.array(
    [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
     [41.5493, 92.3655], [70.7299, 92.2041]],
    dtype=np.float32)

def estimate_norm(lmk, image_size=112,mode='arcface'):
    assert lmk.shape == (5, 2)
    assert image_size%112==0 or image_size%128==0
    if image_size%112==0:
        ratio = float(image_size)/112.0
        diff_x = 0
    else:
        ratio = float(image_size)/128.0
        diff_x = 8.0*ratio
    dst = arcface_dst * ratio
    dst[:,0] += diff_x
    tform = trans.SimilarityTransform()
    tform.estimate(lmk, dst)
    M = tform.params[0:2, :]
    return M

def pad_to_stride(image, stride=32):
    h, w, _ = image.shape
    pr = (stride - w % stride) % stride
    pb = (stride - h % stride) % stride
    padded_image = tv.transforms.transforms.F.pad(image.permute(2,0,1), (0, 0, pr, pb)).permute(1,2,0)
    return padded_image

def resize(img, size):
    h, w, _ = img.shape
    s = max(h, w)
    scale_factor = s / size
    ph, pw = (s - h) // 2, (s - w) // 2
    pad = tv.transforms.Pad((pw, ph))
    resize = tv.transforms.Resize(size=(size, size), antialias=True)
    img = resize(pad(img.permute(2,0,1))).permute(1,2,0)
    return img, scale_factor, ph, pw


import torch


class Models:
    @classmethod
    def yolo(cls, img, threshold):
        if '_yolo' not in cls.__dict__:

            cls._yolo = YOLO(os.path.join(models_dir,'ultralytics','bbox','face_yolov8m.pt'))
            print("YOLO:", cls._yolo.device, cls._yolo.device.type)
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            cls._yolo = cls._yolo.to(device)
            print("YOLO:", cls._yolo.device, cls._yolo.device.type)

        dets = cls._yolo(img, conf=threshold)[0]

        return dets
    @classmethod
    def lmk(cls, crop):
        if '_lmk' not in cls.__dict__:
            cls._lmk = InferenceSession(os.path.join(models_dir, 'landmarks', 'fan2_68_landmark.onnx'))
        lmk = cls._lmk.run(None, {'input': crop})[0]
        return lmk

def get_submatrix_with_padding(img, a, b, c, d):
    pl = -min(a, 0)
    pt = -min(b, 0)
    pr = -min(img.shape[1] - c, 0)
    pb = -min(img.shape[0] - d, 0)
    a, b, c, d = max(a, 0), max(b, 0), min(c, img.shape[1]), min(d, img.shape[0])

    submatrix = img[b:d, a:c].permute(2,0,1)
    pad = tv.transforms.Pad((pl, pt, pr, pb))
    submatrix = pad(submatrix).permute(1,2,0)

    return submatrix

class Face:
    def __init__(self, img, a, b, c, d, scale) -> None:
        self.img = img
        self.scale = scale
        lmk = None
        best_score = 0
        i = 0
        crop = get_submatrix_with_padding(self.img, a, b, c, d)
        for curr_i in range(4):
            rcrop, s, ph, pw = resize(crop.rot90(curr_i), 256)
            rcrop = (rcrop[None] / 255).permute(0,3,1,2).type(torch.float32).numpy()
            curr_lmk = Models.lmk(rcrop)
            score = np.mean(curr_lmk[0,:,2])
            if score > best_score:
                best_score = score
                lmk = curr_lmk
                i = curr_i

        self.bbox = (a,b,c,d)
        self.confidence = best_score

        self.kps = np.vstack([
            lmk[0,[37,38,40,41],:2].mean(axis=0),
            lmk[0,[43,44,46,47],:2].mean(axis=0),
            lmk[0,[30,48,54],:2]
        ]) * 4 * s

        self.T2 = np.array([[1, 0, -a], [0, 1, -b], [0, 0, 1]])
        rot = cv2.getRotationMatrix2D((128*s,128*s), 90*i, 1)
        self.R = np.vstack((rot, np.array((0,0,1))))

    def crop(self, size, crop_factor):
        S = np.array([[1/crop_factor, 0, 0], [0, 1/crop_factor, 0], [0, 0, 1]])
        M = estimate_norm(self.kps, size)
        N = M @ self.R @ self.T2
        cx, cy = np.array((size/2, size/2, 1)) @ cv2.invertAffineTransform(M @ self.R @ self.T2).T
        T3 = np.array([[1, 0, -cx], [0, 1, -cy], [0, 0, 1]])
        T4 = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]])
        N = N @ T4 @ S @ T3
        crop = cv2.warpAffine(self.img.numpy(), N, (size, size))
        crop = torch.from_numpy(crop)[None]

        return N, crop

import time
def detect_faces(img, threshold, dImg, scale):
    img = pad_to_stride(img, stride=32)
    #The padding is probably fucked now.. TODO
    dImg = pad_to_stride(dImg, stride=32)

    delta = time.time()
    dets = Models.yolo((dImg[None] / 255).permute(0,3,1,2), threshold)
    delta = time.time() - delta
    print("[?] YOLO ran in %s" % delta)
    boxes = (dets.boxes.xyxy.reshape(-1,2,2)).reshape(-1,4)
    faces = []
    delta = time.time()
    for (a,b,c,d), box in zip(boxes.type(torch.int).cpu().numpy(), dets.boxes):
        cx, cy = (a+c)/2, (b+d)/2
        r = np.sqrt((c-a)**2 + (d-b)**2) / 2

        a,b,c,d = [int(x * (1.0/scale)) for x in (cx - r, cy - r, cx + r, cy + r)]
        #a,b,c,d = [int(x * (1.0)) for x in (cx - r, cy - r, cx + r, cy + r)]
        face = Face(img, a, b, c, d, scale)

        faces.append(face)
    delta = time.time() - delta
    print("[?] Alignment took %s" % delta)
    return faces

def get_face_mesh(crop: torch.Tensor):
    with mp.solutions.face_mesh.FaceMesh(max_num_faces=10) as face_mesh:
        mesh = face_mesh.process(crop.mul(255).type(torch.uint8)[0].numpy())
    _, h, w, _ = crop.shape
    if mesh.multi_face_landmarks is not None:
        all_pts = np.array([np.array([(w*l.x, h*l.y) for l in lmks.landmark]) for lmks in mesh.multi_face_landmarks], dtype=np.int32)
        idx = np.argmin(np.abs(all_pts - np.array([w/2,h/2])).sum(axis=(1,2)))
        points = all_pts[idx]
        return points
    else:
        return None

def mask_simple_square(face, M, crop):
    # rotated bbox and size
    h,w = crop.shape[1:3]
    a,b,c,d = face.bbox
    rect = np.array([
        [a,b,1],
        [a,d,1],
        [c,b,1],
        [c,d,1],
    ]) @ M.T
    lx, ly = [int(x) for x in np.min(rect, axis=0)]
    hx, hy = [int(x) for x in np.max(rect, axis=0)]
    mask = np.zeros((h,w), dtype=np.float32)
    mask = cv2.rectangle(mask, (lx,ly), (hx,hy), 1, -1)
    mask = torch.from_numpy(mask)[None]
    return mask

def mask_convex_hull(face, M, crop):
    h,w = crop.shape[1:3]
    points = get_face_mesh(crop)
    if points is None: return mask_simple_square(face, M, crop)
    hull = ConvexHull(points)
    mask = np.zeros((h,w), dtype=np.int32)
    cv2.fillPoly(mask, [points[hull.vertices,:]], color=1)
    mask = mask.astype(np.float32)
    mask = torch.from_numpy(mask[None])
    return mask

def mask_BiSeNet(crop,
                 skin=True,
                 l_brow=True,
                 r_brow=True,
                 l_eye=True,
                 r_eye=True,
                 eye_g=True,
                 l_ear=True,
                 r_ear=True,
                 ear_r=True,
                 nose=True,
                 mouth=True,
                 u_lip=True,
                 l_lip=True,
                 neck=False,
                 neck_l=False,
                 cloth=False,
                 hair=False,
                 hat=False,
                 ):
    with torch.no_grad():
        bisenet = BiSeNet(n_classes=19)
        bisenet.cuda()
        model_path = os.path.join(models_dir, 'bisenet', '79999_iter.pth')
        bisenet.load_state_dict(torch.load(model_path))
        bisenet.eval()
        crop_t = crop.permute(0,3,1,2).cuda().float()
        segms_t = bisenet(crop_t)[0].argmax(1).float()

    dic = {
        'skin': 1,
        'l_brow': 2,
        'r_brow': 3,
        'l_eye': 4,
        'r_eye': 5,
        'eye_g': 6,
        'l_ear': 7,
        'r_ear': 8,
        'ear_r': 9,
        'nose': 10,
        'mouth': 11,
        'u_lip': 12,
        'l_lip': 13,
        'neck': 14,
        'neck_l': 15,
        'cloth': 16,
        'hair': 17,
        'hat': 18,
    }
    keep = []
    for k, v in locals().items():
        if k in dic and v:
            keep.append(dic[k])

    face_part_ids = torch.tensor(keep).cuda()
    segms_t = torch.sum(segms_t.repeat(len(face_part_ids), 1,1,1) == face_part_ids[...,None,None,None], axis=0).float()
    mask = segms_t.cpu()
    return mask

def mask_jonathandinu(crop, skin=True, nose=True, eye_g=True, l_eye=True, r_eye=True, l_brow=True, r_brow=True,
                    l_ear=True, r_ear=True, mouth=True, u_lip=True, l_lip=True,
                    hair=False, hat=False, ear_r=False, neck_l=False, neck=False, cloth=False):
    global jonathandinu_image_processor, jonathandinu_model

    device = (
        "cuda"
        # Device for NVIDIA or AMD GPUs
        if torch.cuda.is_available()
        else "mps"
        # Device for Apple Silicon (Metal Performance Shaders)
        if torch.backends.mps.is_available()
        else "cpu"
    )

    if 'jonathandinu_image_processor' not in globals():
        jonathandinu_image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing")
        jonathandinu_model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
        jonathandinu_model.to(device)

    inputs = jonathandinu_image_processor(images=crop.mul(255).type(torch.uint8), return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = jonathandinu_model(**inputs)
    logits = outputs.logits  # shape (batch_size, num_labels, ~height/4, ~width/4)

    # resize output to match input image dimensions
    upsampled_logits = tv.transforms.functional.resize(logits, crop.shape[1:3], antialias=True)

    labels = upsampled_logits.argmax(dim=1)

    ids = {
        'skin': 1,
        'nose': 2,
        'eye_g': 3,
        'l_eye': 4,
        'r_eye': 5,
        'l_brow': 6,
        'r_brow': 7,
        'l_ear': 8,
        'r_ear': 9,
        'mouth': 10,
        'u_lip': 11,
        'l_lip': 12,
        'hair': 13,
        'hat': 14,
        'ear_r': 15,
        'neck_l': 16,
        'neck': 17,
        'cloth': 18,
    }
    keep = []
    for k, v in locals().items():
        if k in ids and v:
            keep.append(ids[k])
    face_part_ids = torch.tensor(keep).cuda()

    mask = torch.sum(labels.repeat(len(face_part_ids), 1,1,1) == face_part_ids[...,None,None,None], axis=0).float().cpu()

    return mask

mask_types = [
    'simple_square',
    'convex_hull',
    'BiSeNet',
    'jonathandinu',
    # 'clean BiSeNet',
]

mask_funs = {
    'simple_square': mask_simple_square,
    'convex_hull': mask_convex_hull,
    'BiSeNet': lambda face, M, crop: mask_BiSeNet(crop),
    'jonathandinu': lambda face, M, crop: mask_jonathandinu(crop),
    # 'clean BiSeNet': mask_clean_BiSeNet,
}

def mask_crop(face, M, crop, mask_type):
    mask = mask_funs[mask_type](face, M, crop)
    return mask
	import torch
	from collections import defaultdict
	from .utils import *
	from transformers import pipeline

	class GenderFaceFilter:
	@classmethod
	def INPUT_TYPES(cls):
	return {
	'required': {
	'faces': ('FACE',),
	'gender': (['man', 'woman'],)
	}
	}

	RETURN_TYPES = ('FACE', 'FACE')
	RETURN_NAMES = ('filtered', 'rest')
	FUNCTION = 'run'
	CATEGORY = 'facetools'

	def run(self, faces, gender):
	filtered = []
	rest = []
	pipe = pipeline('image-classification', model='dima806/man_woman_face_image_detection', device=0)
	for face in faces:
	_, im = face.crop(224, 1.2)
	im = im.permute(0,3,1,2)[0]
	im = tv.transforms.functional.resize(im, (224,224))
	r = pipe(tv.transforms.functional.to_pil_image(im))
	idx = np.argmax([i['score'] for i in r])
	if r[idx]['label'] == gender:
	filtered.append(face)
	else:
	rest.append(face)
	return (filtered, rest)

	class OrderedFaceFilter:
	@classmethod
	def INPUT_TYPES(cls):
	return {
	'required': {
	'faces': ('FACE',),
	'criteria': (['area'],),
	'order': (['descending', 'ascending'],),
	'take_start': ('INT', {'default': 0, 'min': 0, 'step': 1}),
	'take_count': ('INT', {'default': 1, 'min': 1, 'step': 1}),
	}
	}

	RETURN_TYPES = ('FACE', 'FACE')
	RETURN_NAMES = ('filtered', 'rest')
	FUNCTION = 'run'
	CATEGORY = 'facetools'

	def run(self, faces, criteria, order, take_start, take_count):
	filtered = []
	rest = []
	funs = {
	'area': lambda face: face.w * face.h
	}
	sorted_faces = sorted(faces, key=funs[criteria], reverse=order == 'descending')
	filtered = sorted_faces[take_start:take_start+take_count]
	rest = sorted_faces[:take_start] + sorted_faces[take_start+take_count:]
	return (filtered, rest)

	import time
	class DetectFaces:
	@classmethod
	def INPUT_TYPES(cls):
	return {
	'required': {
	'image': ('IMAGE',),
	'threshold': ('FLOAT', {'default': 0.5, 'min': 0.0, 'max': 1.0, 'step': 0.01}),
	'min_size': ('INT', {'default': 64, 'max': 512, 'step': 8}),
	'max_size': ('INT', {'default': 512, 'min': 512, 'step': 8}),
	},
	'optional': {
	'mask': ('MASK',),
	'scale_to_max_size': ("BOOLEAN", {'default': False})
	}
	}

	RETURN_TYPES = ('FACE', "IMAGE")
	RETURN_NAMES = ('faces', "image")
	FUNCTION = 'run'
	CATEGORY = 'facetools'

	def run(self, image, threshold, min_size, max_size, mask=None, scale_to_max_size=False):
	faces = []
	is_downscaled = False
	# Because of rounding issues we need this for both axes
	# ATM there might still be a slight issue because of the rounding.
	scale = 1
	dImg = image
	if scale_to_max_size:
	print("Image shape", image.shape, image.dtype)
	max_side = max(image.shape[1], image.shape[2])
	scale = max_size / max_side

	if scale < 1.0:
	nh = round(image.shape[1] * scale)
	nw = round(image.shape[2] * scale)
	print("Scaling down image from %sx%s to %sx%s (%s) for detection" % (image.shape[1], image.shape[2], nh, nw, scale))
	is_downscaled = True
	delta = time.time()
	dImg = dImg.permute(0,3,1,2)
	dImg = tv.transforms.functional.resize(dImg, (nh, nw))
	dImg = dImg.permute(0,2,3,1)
	delta = time.time() - delta
	print("[?] Downsizing took %s secs" % delta)
	# TODO: also resize masks ?

	masked = image
	if mask is not None:
	delta = time.time()
	masked = image * tv.transforms.functional.resize(1-mask, image.shape[1:3])[..., None]
	delta = time.time() - delta
	print("[?] Apply mask took %s secs" % delta)

	delta = time.time()
	masked = (masked * 255).type(torch.uint8)
	maskedD = (dImg * 255).type(torch.uint8) # TODO: optimize we probably don't need to mask both.. For sure not if we are not even downsizing
	for i, _x in enumerate(zip(masked, maskedD)):
	img, dImg = _x
	unfiltered_faces = detect_faces(img, threshold, dImg, scale)
	for face in unfiltered_faces:
	a, b, c, d = face.bbox
	h = abs(d-b)
	w = abs(c-a)
	# TODO: this should be an and anyway ? ATM this only checks for the shorter axis on max check ?
	if ((is_downscaled or (h <= max_size or w <= max_size))) and (min_size <= h or min_size <= w):
	face.image_idx = i
	face.img = image[i]
	faces.append(face)
	else:
	print("Skip face %i with %ix%i" % (i, w, h))
	delta = time.time() - delta
	print("[?] Full detect face took %s secs" % delta)
	return (faces, image)

	class CropFaces:
	@classmethod
	def INPUT_TYPES(cls):
	return {
	'required': {
	'faces': ('FACE',),
	'crop_size': ('INT', {'default': 512, 'min': 512, 'max': 1024, 'step': 128}),
	'crop_factor': ('FLOAT', {'default': 1.5, 'min': 1.0, 'max': 3, 'step': 0.1}),
	'mask_type': (mask_types,)
	}
	}

	RETURN_TYPES = ('IMAGE', 'MASK', 'WARP')
	RETURN_NAMES = ('crops', 'masks', 'warps')
	FUNCTION = 'run'
	CATEGORY = 'facetools'

	def run(self, faces, crop_size, crop_factor, mask_type):
	if len(faces) == 0:
	empty_crop = torch.zeros((1,512,512,3))
	empty_mask = torch.zeros((1,512,512))
	empty_warp = np.array([
	[1,0,-512],
	[0,1,-512],
	], dtype=np.float32)
	return (empty_crop, empty_mask, [empty_warp])

	crops = []
	masks = []
	warps = []
	for face in faces:
	M, crop = face.crop(crop_size, crop_factor)
	mask = mask_crop(face, M, crop, mask_type)
	crops.append(np.array(crop[0]))
	masks.append(np.array(mask[0]))
	warps.append(M)
	crops = torch.from_numpy(np.array(crops)).type(torch.float32)
	masks = torch.from_numpy(np.array(masks)).type(torch.float32)
	return (crops, masks, warps)

	class WarpFaceBack:
	RETURN_TYPES = ('IMAGE',)
	FUNCTION = 'run'
	CATEGORY = 'facetools'

	@classmethod
	def INPUT_TYPES(cls):
	return {
	'required': {
	'images': ('IMAGE',),
	'face': ('FACE',),
	'crop': ('IMAGE',),
	'mask': ('MASK',),
	'warp': ('WARP',),
	},
	'optional': {
	'multi_images': ('BOOLEAN', {'default': False}),
	}
	}

	def run(self, images, face, crop, mask, warp, multi_images=False):

	groups = defaultdict(list)
	for f,c,m,w in zip(face, crop, mask, warp):
	groups[f.image_idx].append((f.img,c,m,w))
	print("Faceidx: ", f.image_idx)

	def _exec(image, values):
	#values = groups[i]
	crop, mask, warp = list(zip(*[x[1:] for x in values]))
	warped_masks = [cv2.warpAffine(single_mask.numpy(),
	cv2.invertAffineTransform(single_warp),
	image.shape[1::-1])
	for single_warp, single_mask in zip(warp, mask)]
	full_mask = np.add.reduce(warped_masks, axis=0)[...,None]
	swapped = np.add.reduce([
	cv2.warpAffine(single_crop.cpu().numpy(),
	cv2.invertAffineTransform(single_warp),
	image.shape[1::-1]
	) * single_mask[..., None]
	for single_crop, single_mask, single_warp in zip(crop, warped_masks, warp)
	], axis=0) / np.maximum(1, full_mask)
	full_mask = np.minimum(1, full_mask)
	result = (swapped + (1 - full_mask) * image.numpy())
	result = torch.from_numpy(result)
	return result

	results = []
	print("Warp back Images: ", images.shape)
	for i, image in enumerate(images):
	if i not in groups:
	print("No face found on", i)
	result = image
	results.append(result)
	else:
	values = groups[i]
	if(multi_images):
	# TODO: atm batches as input are wonkey in this mode.
	# Will return the sum of all found faces + "empty" images for now
	for v in values:
	result = _exec(image, (v,))
	results.append(result)
	else:
	result = _exec(image, values)
	results.append(result)

	results = torch.stack(results)
	return (results, )

	class MergeWarps:
	RETURN_TYPES = ('IMAGE','MASK','WARP')
	FUNCTION = 'run'
	CATEGORY = 'facetools'

	@classmethod
	def INPUT_TYPES(cls):
	return {
	'required': {
	'crop0': ('IMAGE',),
	'mask0': ('MASK',),
	'warp0': ('WARP',),
	'crop1': ('IMAGE',),
	'mask1': ('MASK',),
	'warp1': ('WARP',),
	}
	}

	def run(self, crop0, mask0, warp0, crop1, mask1, warp1):
	crops = torch.vstack((crop0, crop1))
	masks = torch.vstack((mask0, mask1))
	warps = warp0 + warp1
	return (crops, masks, warps)

	class BiSeNetMask:
	RETURN_TYPES = ('MASK',)
	FUNCTION = 'run'
	CATEGORY = 'facetools'

	@classmethod
	def INPUT_TYPES(cls):
	return {
	'required': {
	'crop': ('IMAGE',),
	'skin': ('BOOLEAN', {'default': True}),
	'left_brow': ('BOOLEAN', {'default': True}),
	'right_brow': ('BOOLEAN', {'default': True}),
	'left_eye': ('BOOLEAN', {'default': True}),
	'right_eye': ('BOOLEAN', {'default': True}),
	'eyeglasses': ('BOOLEAN', {'default': True}),
	'left_ear': ('BOOLEAN', {'default': True}),
	'right_ear': ('BOOLEAN', {'default': True}),
	'earring': ('BOOLEAN', {'default': True}),
	'nose': ('BOOLEAN', {'default': True}),
	'mouth': ('BOOLEAN', {'default': True}),
	'upper_lip': ('BOOLEAN', {'default': True}),
	'lower_lip': ('BOOLEAN', {'default': True}),
	'neck': ('BOOLEAN', {'default': False}),
	'necklace': ('BOOLEAN', {'default': False}),
	'cloth': ('BOOLEAN', {'default': False}),
	'hair': ('BOOLEAN', {'default': False}),
	'hat': ('BOOLEAN', {'default': False}),
	}
	}

	def run(self, crop, skin, left_brow, right_brow, left_eye, right_eye, eyeglasses,
	left_ear, right_ear, earring, nose, mouth, upper_lip, lower_lip,
	neck, necklace, cloth, hair, hat):
	masks = mask_BiSeNet(crop, skin, left_brow, right_brow, left_eye, right_eye, eyeglasses,
	left_ear, right_ear, earring, nose, mouth, upper_lip, lower_lip,
	neck, necklace, cloth, hair, hat)
	return (masks, )

	class JonathandinuMask:
	RETURN_TYPES = ('MASK',)
	FUNCTION = 'run'
	CATEGORY = 'facetools'

	@classmethod
	def INPUT_TYPES(cls):
	return {
	'required': {
	'crop': ('IMAGE',),
	'skin': ('BOOLEAN', {'default': True}),
	'nose': ('BOOLEAN', {'default': True}),
	'eyeglasses': ('BOOLEAN', {'default': False}),
	'left_eye': ('BOOLEAN', {'default': True}),
	'right_eye': ('BOOLEAN', {'default': True}),
	'left_brow': ('BOOLEAN', {'default': True}),
	'right_brow': ('BOOLEAN', {'default': True}),
	'left_ear': ('BOOLEAN', {'default': True}),
	'right_ear': ('BOOLEAN', {'default': True}),
	'mouth': ('BOOLEAN', {'default': True}),
	'upper_lip': ('BOOLEAN', {'default': True}),
	'lower_lip': ('BOOLEAN', {'default': True}),
	'hair': ('BOOLEAN', {'default': False}),
	'hat': ('BOOLEAN', {'default': False}),
	'earring': ('BOOLEAN', {'default': False}),
	'necklace': ('BOOLEAN', {'default': False}),
	'neck': ('BOOLEAN', {'default': False}),
	'cloth': ('BOOLEAN', {'default': False}),
	}
	}

	def run(self, crop, skin, nose, eyeglasses, left_eye, right_eye, left_brow, right_brow, left_ear, right_ear,
	mouth, upper_lip, lower_lip, hair, hat, earring, necklace, neck, cloth):
	masks = mask_jonathandinu(crop, skin, nose, eyeglasses, left_eye, right_eye, left_brow, right_brow, left_ear, right_ear,
	mouth, upper_lip, lower_lip, hair, hat, earring, necklace, neck, cloth)
	return (masks, )

	NODE_CLASS_MAPPINGS = {
	'DetectFaces': DetectFaces,
	'CropFaces': CropFaces,
	'WarpFacesBack': WarpFaceBack,
	'BiSeNetMask': BiSeNetMask,
	'JonathandinuMask': JonathandinuMask,
	'MergeWarps': MergeWarps,
	'GenderFaceFilter': GenderFaceFilter,
	'OrderedFaceFilter': OrderedFaceFilter,
	}

	NODE_DISPLAY_NAME_MAPPINGS = {
	'DetectFaces': 'DetectFaces',
	'CropFaces': 'CropFaces',
	'WarpFacesBack': 'Warp Faces Back',
	'BiSeNetMask': 'BiSeNet Mask',
	'JonathandinuMask': 'Jonathandinu Mask',
	'MergeWarps': 'Merge Warps',
	'GenderFaceFilter': 'Gender Face Filter',
	'OrderedFaceFilter': 'Ordered Face Filter',
	}
	import os
	import torch
	import torchvision as tv
	import numpy as np
	import cv2
	import mediapipe as mp
	from scipy.spatial import ConvexHull
	from folder_paths import models_dir
	from .BiSeNet import BiSeNet
	from ultralytics import YOLO
	from onnxruntime import InferenceSession
	from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation
	from skimage import transform as trans


	arcface_dst = np.array(
	[[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
	[41.5493, 92.3655], [70.7299, 92.2041]],
	dtype=np.float32)

	def estimate_norm(lmk, image_size=112,mode='arcface'):
	assert lmk.shape == (5, 2)
	assert image_size%112==0 or image_size%128==0
	if image_size%112==0:
	ratio = float(image_size)/112.0
	diff_x = 0
	else:
	ratio = float(image_size)/128.0
	diff_x = 8.0*ratio
	dst = arcface_dst * ratio
	dst[:,0] += diff_x
	tform = trans.SimilarityTransform()
	tform.estimate(lmk, dst)
	M = tform.params[0:2, :]
	return M

	def pad_to_stride(image, stride=32):
	h, w, _ = image.shape
	pr = (stride - w % stride) % stride
	pb = (stride - h % stride) % stride
	padded_image = tv.transforms.transforms.F.pad(image.permute(2,0,1), (0, 0, pr, pb)).permute(1,2,0)
	return padded_image

	def resize(img, size):
	h, w, _ = img.shape
	s = max(h, w)
	scale_factor = s / size
	ph, pw = (s - h) // 2, (s - w) // 2
	pad = tv.transforms.Pad((pw, ph))
	resize = tv.transforms.Resize(size=(size, size), antialias=True)
	img = resize(pad(img.permute(2,0,1))).permute(1,2,0)
	return img, scale_factor, ph, pw


	import torch


	class Models:
	@classmethod
	def yolo(cls, img, threshold):
	if '_yolo' not in cls.__dict__:

	cls._yolo = YOLO(os.path.join(models_dir,'ultralytics','bbox','face_yolov8m.pt'))
	print("YOLO:", cls._yolo.device, cls._yolo.device.type)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	cls._yolo = cls._yolo.to(device)
	print("YOLO:", cls._yolo.device, cls._yolo.device.type)

	dets = cls._yolo(img, conf=threshold)[0]

	return dets
	@classmethod
	def lmk(cls, crop):
	if '_lmk' not in cls.__dict__:
	cls._lmk = InferenceSession(os.path.join(models_dir, 'landmarks', 'fan2_68_landmark.onnx'))
	lmk = cls._lmk.run(None, {'input': crop})[0]
	return lmk

	def get_submatrix_with_padding(img, a, b, c, d):
	pl = -min(a, 0)
	pt = -min(b, 0)
	pr = -min(img.shape[1] - c, 0)
	pb = -min(img.shape[0] - d, 0)
	a, b, c, d = max(a, 0), max(b, 0), min(c, img.shape[1]), min(d, img.shape[0])

	submatrix = img[b:d, a:c].permute(2,0,1)
	pad = tv.transforms.Pad((pl, pt, pr, pb))
	submatrix = pad(submatrix).permute(1,2,0)

	return submatrix

	class Face:
	def __init__(self, img, a, b, c, d, scale) -> None:
	self.img = img
	self.scale = scale
	lmk = None
	best_score = 0
	i = 0
	crop = get_submatrix_with_padding(self.img, a, b, c, d)
	for curr_i in range(4):
	rcrop, s, ph, pw = resize(crop.rot90(curr_i), 256)
	rcrop = (rcrop[None] / 255).permute(0,3,1,2).type(torch.float32).numpy()
	curr_lmk = Models.lmk(rcrop)
	score = np.mean(curr_lmk[0,:,2])
	if score > best_score:
	best_score = score
	lmk = curr_lmk
	i = curr_i

	self.bbox = (a,b,c,d)
	self.confidence = best_score

	self.kps = np.vstack([
	lmk[0,[37,38,40,41],:2].mean(axis=0),
	lmk[0,[43,44,46,47],:2].mean(axis=0),
	lmk[0,[30,48,54],:2]
	]) * 4 * s

	self.T2 = np.array([[1, 0, -a], [0, 1, -b], [0, 0, 1]])
	rot = cv2.getRotationMatrix2D((128s,128s), 90*i, 1)
	self.R = np.vstack((rot, np.array((0,0,1))))

	def crop(self, size, crop_factor):
	S = np.array([[1/crop_factor, 0, 0], [0, 1/crop_factor, 0], [0, 0, 1]])
	M = estimate_norm(self.kps, size)
	N = M @ self.R @ self.T2
	cx, cy = np.array((size/2, size/2, 1)) @ cv2.invertAffineTransform(M @ self.R @ self.T2).T
	T3 = np.array([[1, 0, -cx], [0, 1, -cy], [0, 0, 1]])
	T4 = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]])
	N = N @ T4 @ S @ T3
	crop = cv2.warpAffine(self.img.numpy(), N, (size, size))
	crop = torch.from_numpy(crop)[None]

	return N, crop

	import time
	def detect_faces(img, threshold, dImg, scale):
	img = pad_to_stride(img, stride=32)
	#The padding is probably fucked now.. TODO
	dImg = pad_to_stride(dImg, stride=32)

	delta = time.time()
	dets = Models.yolo((dImg[None] / 255).permute(0,3,1,2), threshold)
	delta = time.time() - delta
	print("[?] YOLO ran in %s" % delta)
	boxes = (dets.boxes.xyxy.reshape(-1,2,2)).reshape(-1,4)
	faces = []
	delta = time.time()
	for (a,b,c,d), box in zip(boxes.type(torch.int).cpu().numpy(), dets.boxes):
	cx, cy = (a+c)/2, (b+d)/2
	r = np.sqrt((c-a)2 + (d-b)2) / 2

	a,b,c,d = [int(x * (1.0/scale)) for x in (cx - r, cy - r, cx + r, cy + r)]
	#a,b,c,d = [int(x * (1.0)) for x in (cx - r, cy - r, cx + r, cy + r)]
	face = Face(img, a, b, c, d, scale)

	faces.append(face)
	delta = time.time() - delta
	print("[?] Alignment took %s" % delta)
	return faces

	def get_face_mesh(crop: torch.Tensor):
	with mp.solutions.face_mesh.FaceMesh(max_num_faces=10) as face_mesh:
	mesh = face_mesh.process(crop.mul(255).type(torch.uint8)[0].numpy())
	_, h, w, _ = crop.shape
	if mesh.multi_face_landmarks is not None:
	all_pts = np.array([np.array([(wl.x, hl.y) for l in lmks.landmark]) for lmks in mesh.multi_face_landmarks], dtype=np.int32)
	idx = np.argmin(np.abs(all_pts - np.array([w/2,h/2])).sum(axis=(1,2)))
	points = all_pts[idx]
	return points
	else:
	return None

	def mask_simple_square(face, M, crop):
	# rotated bbox and size
	h,w = crop.shape[1:3]
	a,b,c,d = face.bbox
	rect = np.array([
	[a,b,1],
	[a,d,1],
	[c,b,1],
	[c,d,1],
	]) @ M.T
	lx, ly = [int(x) for x in np.min(rect, axis=0)]
	hx, hy = [int(x) for x in np.max(rect, axis=0)]
	mask = np.zeros((h,w), dtype=np.float32)
	mask = cv2.rectangle(mask, (lx,ly), (hx,hy), 1, -1)
	mask = torch.from_numpy(mask)[None]
	return mask

	def mask_convex_hull(face, M, crop):
	h,w = crop.shape[1:3]
	points = get_face_mesh(crop)
	if points is None: return mask_simple_square(face, M, crop)
	hull = ConvexHull(points)
	mask = np.zeros((h,w), dtype=np.int32)
	cv2.fillPoly(mask, [points[hull.vertices,:]], color=1)
	mask = mask.astype(np.float32)
	mask = torch.from_numpy(mask[None])
	return mask

	def mask_BiSeNet(crop,
	skin=True,
	l_brow=True,
	r_brow=True,
	l_eye=True,
	r_eye=True,
	eye_g=True,
	l_ear=True,
	r_ear=True,
	ear_r=True,
	nose=True,
	mouth=True,
	u_lip=True,
	l_lip=True,
	neck=False,
	neck_l=False,
	cloth=False,
	hair=False,
	hat=False,
	):
	with torch.no_grad():
	bisenet = BiSeNet(n_classes=19)
	bisenet.cuda()
	model_path = os.path.join(models_dir, 'bisenet', '79999_iter.pth')
	bisenet.load_state_dict(torch.load(model_path))
	bisenet.eval()
	crop_t = crop.permute(0,3,1,2).cuda().float()
	segms_t = bisenet(crop_t)[0].argmax(1).float()

	dic = {
	'skin': 1,
	'l_brow': 2,
	'r_brow': 3,
	'l_eye': 4,
	'r_eye': 5,
	'eye_g': 6,
	'l_ear': 7,
	'r_ear': 8,
	'ear_r': 9,
	'nose': 10,
	'mouth': 11,
	'u_lip': 12,
	'l_lip': 13,
	'neck': 14,
	'neck_l': 15,
	'cloth': 16,
	'hair': 17,
	'hat': 18,
	}
	keep = []
	for k, v in locals().items():
	if k in dic and v:
	keep.append(dic[k])

	face_part_ids = torch.tensor(keep).cuda()
	segms_t = torch.sum(segms_t.repeat(len(face_part_ids), 1,1,1) == face_part_ids[...,None,None,None], axis=0).float()
	mask = segms_t.cpu()
	return mask

	def mask_jonathandinu(crop, skin=True, nose=True, eye_g=True, l_eye=True, r_eye=True, l_brow=True, r_brow=True,
	l_ear=True, r_ear=True, mouth=True, u_lip=True, l_lip=True,
	hair=False, hat=False, ear_r=False, neck_l=False, neck=False, cloth=False):
	global jonathandinu_image_processor, jonathandinu_model

	device = (
	"cuda"
	# Device for NVIDIA or AMD GPUs
	if torch.cuda.is_available()
	else "mps"
	# Device for Apple Silicon (Metal Performance Shaders)
	if torch.backends.mps.is_available()
	else "cpu"
	)

	if 'jonathandinu_image_processor' not in globals():
	jonathandinu_image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing")
	jonathandinu_model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing")
	jonathandinu_model.to(device)

	inputs = jonathandinu_image_processor(images=crop.mul(255).type(torch.uint8), return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = jonathandinu_model(**inputs)
	logits = outputs.logits # shape (batch_size, num_labels, ~height/4, ~width/4)

	# resize output to match input image dimensions
	upsampled_logits = tv.transforms.functional.resize(logits, crop.shape[1:3], antialias=True)

	labels = upsampled_logits.argmax(dim=1)

	ids = {
	'skin': 1,
	'nose': 2,
	'eye_g': 3,
	'l_eye': 4,
	'r_eye': 5,
	'l_brow': 6,
	'r_brow': 7,
	'l_ear': 8,
	'r_ear': 9,
	'mouth': 10,
	'u_lip': 11,
	'l_lip': 12,
	'hair': 13,
	'hat': 14,
	'ear_r': 15,
	'neck_l': 16,
	'neck': 17,
	'cloth': 18,
	}
	keep = []
	for k, v in locals().items():
	if k in ids and v:
	keep.append(ids[k])
	face_part_ids = torch.tensor(keep).cuda()

	mask = torch.sum(labels.repeat(len(face_part_ids), 1,1,1) == face_part_ids[...,None,None,None], axis=0).float().cpu()

	return mask

	mask_types = [
	'simple_square',
	'convex_hull',
	'BiSeNet',
	'jonathandinu',
	# 'clean BiSeNet',
	]

	mask_funs = {
	'simple_square': mask_simple_square,
	'convex_hull': mask_convex_hull,
	'BiSeNet': lambda face, M, crop: mask_BiSeNet(crop),
	'jonathandinu': lambda face, M, crop: mask_jonathandinu(crop),
	# 'clean BiSeNet': mask_clean_BiSeNet,
	}

	def mask_crop(face, M, crop, mask_type):
	mask = mask_funs[mask_type](face, M, crop)
	return mask