-
-
Save kilroythethird/125e1300c14e0e20630eb7847eab97e0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from collections import defaultdict | |
from .utils import * | |
from transformers import pipeline | |
class GenderFaceFilter: | |
@classmethod | |
def INPUT_TYPES(cls): | |
return { | |
'required': { | |
'faces': ('FACE',), | |
'gender': (['man', 'woman'],) | |
} | |
} | |
RETURN_TYPES = ('FACE', 'FACE') | |
RETURN_NAMES = ('filtered', 'rest') | |
FUNCTION = 'run' | |
CATEGORY = 'facetools' | |
def run(self, faces, gender): | |
filtered = [] | |
rest = [] | |
pipe = pipeline('image-classification', model='dima806/man_woman_face_image_detection', device=0) | |
for face in faces: | |
_, im = face.crop(224, 1.2) | |
im = im.permute(0,3,1,2)[0] | |
im = tv.transforms.functional.resize(im, (224,224)) | |
r = pipe(tv.transforms.functional.to_pil_image(im)) | |
idx = np.argmax([i['score'] for i in r]) | |
if r[idx]['label'] == gender: | |
filtered.append(face) | |
else: | |
rest.append(face) | |
return (filtered, rest) | |
class OrderedFaceFilter: | |
@classmethod | |
def INPUT_TYPES(cls): | |
return { | |
'required': { | |
'faces': ('FACE',), | |
'criteria': (['area'],), | |
'order': (['descending', 'ascending'],), | |
'take_start': ('INT', {'default': 0, 'min': 0, 'step': 1}), | |
'take_count': ('INT', {'default': 1, 'min': 1, 'step': 1}), | |
} | |
} | |
RETURN_TYPES = ('FACE', 'FACE') | |
RETURN_NAMES = ('filtered', 'rest') | |
FUNCTION = 'run' | |
CATEGORY = 'facetools' | |
def run(self, faces, criteria, order, take_start, take_count): | |
filtered = [] | |
rest = [] | |
funs = { | |
'area': lambda face: face.w * face.h | |
} | |
sorted_faces = sorted(faces, key=funs[criteria], reverse=order == 'descending') | |
filtered = sorted_faces[take_start:take_start+take_count] | |
rest = sorted_faces[:take_start] + sorted_faces[take_start+take_count:] | |
return (filtered, rest) | |
import time | |
class DetectFaces: | |
@classmethod | |
def INPUT_TYPES(cls): | |
return { | |
'required': { | |
'image': ('IMAGE',), | |
'threshold': ('FLOAT', {'default': 0.5, 'min': 0.0, 'max': 1.0, 'step': 0.01}), | |
'min_size': ('INT', {'default': 64, 'max': 512, 'step': 8}), | |
'max_size': ('INT', {'default': 512, 'min': 512, 'step': 8}), | |
}, | |
'optional': { | |
'mask': ('MASK',), | |
'scale_to_max_size': ("BOOLEAN", {'default': False}) | |
} | |
} | |
RETURN_TYPES = ('FACE', "IMAGE") | |
RETURN_NAMES = ('faces', "image") | |
FUNCTION = 'run' | |
CATEGORY = 'facetools' | |
def run(self, image, threshold, min_size, max_size, mask=None, scale_to_max_size=False): | |
faces = [] | |
is_downscaled = False | |
# Because of rounding issues we need this for both axes | |
# ATM there might still be a slight issue because of the rounding. | |
scale = 1 | |
dImg = image | |
if scale_to_max_size: | |
print("Image shape", image.shape, image.dtype) | |
max_side = max(image.shape[1], image.shape[2]) | |
scale = max_size / max_side | |
if scale < 1.0: | |
nh = round(image.shape[1] * scale) | |
nw = round(image.shape[2] * scale) | |
print("Scaling down image from %sx%s to %sx%s (%s) for detection" % (image.shape[1], image.shape[2], nh, nw, scale)) | |
is_downscaled = True | |
delta = time.time() | |
dImg = dImg.permute(0,3,1,2) | |
dImg = tv.transforms.functional.resize(dImg, (nh, nw)) | |
dImg = dImg.permute(0,2,3,1) | |
delta = time.time() - delta | |
print("[?] Downsizing took %s secs" % delta) | |
# TODO: also resize masks ? | |
masked = image | |
if mask is not None: | |
delta = time.time() | |
masked = image * tv.transforms.functional.resize(1-mask, image.shape[1:3])[..., None] | |
delta = time.time() - delta | |
print("[?] Apply mask took %s secs" % delta) | |
delta = time.time() | |
masked = (masked * 255).type(torch.uint8) | |
maskedD = (dImg * 255).type(torch.uint8) # TODO: optimize we probably don't need to mask both.. For sure not if we are not even downsizing | |
for i, _x in enumerate(zip(masked, maskedD)): | |
img, dImg = _x | |
unfiltered_faces = detect_faces(img, threshold, dImg, scale) | |
for face in unfiltered_faces: | |
a, b, c, d = face.bbox | |
h = abs(d-b) | |
w = abs(c-a) | |
# TODO: this should be an and anyway ? ATM this only checks for the shorter axis on max check ? | |
if ((is_downscaled or (h <= max_size or w <= max_size))) and (min_size <= h or min_size <= w): | |
face.image_idx = i | |
face.img = image[i] | |
faces.append(face) | |
else: | |
print("Skip face %i with %ix%i" % (i, w, h)) | |
delta = time.time() - delta | |
print("[?] Full detect face took %s secs" % delta) | |
return (faces, image) | |
class CropFaces: | |
@classmethod | |
def INPUT_TYPES(cls): | |
return { | |
'required': { | |
'faces': ('FACE',), | |
'crop_size': ('INT', {'default': 512, 'min': 512, 'max': 1024, 'step': 128}), | |
'crop_factor': ('FLOAT', {'default': 1.5, 'min': 1.0, 'max': 3, 'step': 0.1}), | |
'mask_type': (mask_types,) | |
} | |
} | |
RETURN_TYPES = ('IMAGE', 'MASK', 'WARP') | |
RETURN_NAMES = ('crops', 'masks', 'warps') | |
FUNCTION = 'run' | |
CATEGORY = 'facetools' | |
def run(self, faces, crop_size, crop_factor, mask_type): | |
if len(faces) == 0: | |
empty_crop = torch.zeros((1,512,512,3)) | |
empty_mask = torch.zeros((1,512,512)) | |
empty_warp = np.array([ | |
[1,0,-512], | |
[0,1,-512], | |
], dtype=np.float32) | |
return (empty_crop, empty_mask, [empty_warp]) | |
crops = [] | |
masks = [] | |
warps = [] | |
for face in faces: | |
M, crop = face.crop(crop_size, crop_factor) | |
mask = mask_crop(face, M, crop, mask_type) | |
crops.append(np.array(crop[0])) | |
masks.append(np.array(mask[0])) | |
warps.append(M) | |
crops = torch.from_numpy(np.array(crops)).type(torch.float32) | |
masks = torch.from_numpy(np.array(masks)).type(torch.float32) | |
return (crops, masks, warps) | |
class WarpFaceBack: | |
RETURN_TYPES = ('IMAGE',) | |
FUNCTION = 'run' | |
CATEGORY = 'facetools' | |
@classmethod | |
def INPUT_TYPES(cls): | |
return { | |
'required': { | |
'images': ('IMAGE',), | |
'face': ('FACE',), | |
'crop': ('IMAGE',), | |
'mask': ('MASK',), | |
'warp': ('WARP',), | |
}, | |
'optional': { | |
'multi_images': ('BOOLEAN', {'default': False}), | |
} | |
} | |
def run(self, images, face, crop, mask, warp, multi_images=False): | |
groups = defaultdict(list) | |
for f,c,m,w in zip(face, crop, mask, warp): | |
groups[f.image_idx].append((f.img,c,m,w)) | |
print("Faceidx: ", f.image_idx) | |
def _exec(image, values): | |
#values = groups[i] | |
crop, mask, warp = list(zip(*[x[1:] for x in values])) | |
warped_masks = [cv2.warpAffine(single_mask.numpy(), | |
cv2.invertAffineTransform(single_warp), | |
image.shape[1::-1]) | |
for single_warp, single_mask in zip(warp, mask)] | |
full_mask = np.add.reduce(warped_masks, axis=0)[...,None] | |
swapped = np.add.reduce([ | |
cv2.warpAffine(single_crop.cpu().numpy(), | |
cv2.invertAffineTransform(single_warp), | |
image.shape[1::-1] | |
) * single_mask[..., None] | |
for single_crop, single_mask, single_warp in zip(crop, warped_masks, warp) | |
], axis=0) / np.maximum(1, full_mask) | |
full_mask = np.minimum(1, full_mask) | |
result = (swapped + (1 - full_mask) * image.numpy()) | |
result = torch.from_numpy(result) | |
return result | |
results = [] | |
print("Warp back Images: ", images.shape) | |
for i, image in enumerate(images): | |
if i not in groups: | |
print("No face found on", i) | |
result = image | |
results.append(result) | |
else: | |
values = groups[i] | |
if(multi_images): | |
# TODO: atm batches as input are wonkey in this mode. | |
# Will return the sum of all found faces + "empty" images for now | |
for v in values: | |
result = _exec(image, (v,)) | |
results.append(result) | |
else: | |
result = _exec(image, values) | |
results.append(result) | |
results = torch.stack(results) | |
return (results, ) | |
class MergeWarps: | |
RETURN_TYPES = ('IMAGE','MASK','WARP') | |
FUNCTION = 'run' | |
CATEGORY = 'facetools' | |
@classmethod | |
def INPUT_TYPES(cls): | |
return { | |
'required': { | |
'crop0': ('IMAGE',), | |
'mask0': ('MASK',), | |
'warp0': ('WARP',), | |
'crop1': ('IMAGE',), | |
'mask1': ('MASK',), | |
'warp1': ('WARP',), | |
} | |
} | |
def run(self, crop0, mask0, warp0, crop1, mask1, warp1): | |
crops = torch.vstack((crop0, crop1)) | |
masks = torch.vstack((mask0, mask1)) | |
warps = warp0 + warp1 | |
return (crops, masks, warps) | |
class BiSeNetMask: | |
RETURN_TYPES = ('MASK',) | |
FUNCTION = 'run' | |
CATEGORY = 'facetools' | |
@classmethod | |
def INPUT_TYPES(cls): | |
return { | |
'required': { | |
'crop': ('IMAGE',), | |
'skin': ('BOOLEAN', {'default': True}), | |
'left_brow': ('BOOLEAN', {'default': True}), | |
'right_brow': ('BOOLEAN', {'default': True}), | |
'left_eye': ('BOOLEAN', {'default': True}), | |
'right_eye': ('BOOLEAN', {'default': True}), | |
'eyeglasses': ('BOOLEAN', {'default': True}), | |
'left_ear': ('BOOLEAN', {'default': True}), | |
'right_ear': ('BOOLEAN', {'default': True}), | |
'earring': ('BOOLEAN', {'default': True}), | |
'nose': ('BOOLEAN', {'default': True}), | |
'mouth': ('BOOLEAN', {'default': True}), | |
'upper_lip': ('BOOLEAN', {'default': True}), | |
'lower_lip': ('BOOLEAN', {'default': True}), | |
'neck': ('BOOLEAN', {'default': False}), | |
'necklace': ('BOOLEAN', {'default': False}), | |
'cloth': ('BOOLEAN', {'default': False}), | |
'hair': ('BOOLEAN', {'default': False}), | |
'hat': ('BOOLEAN', {'default': False}), | |
} | |
} | |
def run(self, crop, skin, left_brow, right_brow, left_eye, right_eye, eyeglasses, | |
left_ear, right_ear, earring, nose, mouth, upper_lip, lower_lip, | |
neck, necklace, cloth, hair, hat): | |
masks = mask_BiSeNet(crop, skin, left_brow, right_brow, left_eye, right_eye, eyeglasses, | |
left_ear, right_ear, earring, nose, mouth, upper_lip, lower_lip, | |
neck, necklace, cloth, hair, hat) | |
return (masks, ) | |
class JonathandinuMask: | |
RETURN_TYPES = ('MASK',) | |
FUNCTION = 'run' | |
CATEGORY = 'facetools' | |
@classmethod | |
def INPUT_TYPES(cls): | |
return { | |
'required': { | |
'crop': ('IMAGE',), | |
'skin': ('BOOLEAN', {'default': True}), | |
'nose': ('BOOLEAN', {'default': True}), | |
'eyeglasses': ('BOOLEAN', {'default': False}), | |
'left_eye': ('BOOLEAN', {'default': True}), | |
'right_eye': ('BOOLEAN', {'default': True}), | |
'left_brow': ('BOOLEAN', {'default': True}), | |
'right_brow': ('BOOLEAN', {'default': True}), | |
'left_ear': ('BOOLEAN', {'default': True}), | |
'right_ear': ('BOOLEAN', {'default': True}), | |
'mouth': ('BOOLEAN', {'default': True}), | |
'upper_lip': ('BOOLEAN', {'default': True}), | |
'lower_lip': ('BOOLEAN', {'default': True}), | |
'hair': ('BOOLEAN', {'default': False}), | |
'hat': ('BOOLEAN', {'default': False}), | |
'earring': ('BOOLEAN', {'default': False}), | |
'necklace': ('BOOLEAN', {'default': False}), | |
'neck': ('BOOLEAN', {'default': False}), | |
'cloth': ('BOOLEAN', {'default': False}), | |
} | |
} | |
def run(self, crop, skin, nose, eyeglasses, left_eye, right_eye, left_brow, right_brow, left_ear, right_ear, | |
mouth, upper_lip, lower_lip, hair, hat, earring, necklace, neck, cloth): | |
masks = mask_jonathandinu(crop, skin, nose, eyeglasses, left_eye, right_eye, left_brow, right_brow, left_ear, right_ear, | |
mouth, upper_lip, lower_lip, hair, hat, earring, necklace, neck, cloth) | |
return (masks, ) | |
NODE_CLASS_MAPPINGS = { | |
'DetectFaces': DetectFaces, | |
'CropFaces': CropFaces, | |
'WarpFacesBack': WarpFaceBack, | |
'BiSeNetMask': BiSeNetMask, | |
'JonathandinuMask': JonathandinuMask, | |
'MergeWarps': MergeWarps, | |
'GenderFaceFilter': GenderFaceFilter, | |
'OrderedFaceFilter': OrderedFaceFilter, | |
} | |
NODE_DISPLAY_NAME_MAPPINGS = { | |
'DetectFaces': 'DetectFaces', | |
'CropFaces': 'CropFaces', | |
'WarpFacesBack': 'Warp Faces Back', | |
'BiSeNetMask': 'BiSeNet Mask', | |
'JonathandinuMask': 'Jonathandinu Mask', | |
'MergeWarps': 'Merge Warps', | |
'GenderFaceFilter': 'Gender Face Filter', | |
'OrderedFaceFilter': 'Ordered Face Filter', | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import torch | |
import torchvision as tv | |
import numpy as np | |
import cv2 | |
import mediapipe as mp | |
from scipy.spatial import ConvexHull | |
from folder_paths import models_dir | |
from .BiSeNet import BiSeNet | |
from ultralytics import YOLO | |
from onnxruntime import InferenceSession | |
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation | |
from skimage import transform as trans | |
arcface_dst = np.array( | |
[[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366], | |
[41.5493, 92.3655], [70.7299, 92.2041]], | |
dtype=np.float32) | |
def estimate_norm(lmk, image_size=112,mode='arcface'): | |
assert lmk.shape == (5, 2) | |
assert image_size%112==0 or image_size%128==0 | |
if image_size%112==0: | |
ratio = float(image_size)/112.0 | |
diff_x = 0 | |
else: | |
ratio = float(image_size)/128.0 | |
diff_x = 8.0*ratio | |
dst = arcface_dst * ratio | |
dst[:,0] += diff_x | |
tform = trans.SimilarityTransform() | |
tform.estimate(lmk, dst) | |
M = tform.params[0:2, :] | |
return M | |
def pad_to_stride(image, stride=32): | |
h, w, _ = image.shape | |
pr = (stride - w % stride) % stride | |
pb = (stride - h % stride) % stride | |
padded_image = tv.transforms.transforms.F.pad(image.permute(2,0,1), (0, 0, pr, pb)).permute(1,2,0) | |
return padded_image | |
def resize(img, size): | |
h, w, _ = img.shape | |
s = max(h, w) | |
scale_factor = s / size | |
ph, pw = (s - h) // 2, (s - w) // 2 | |
pad = tv.transforms.Pad((pw, ph)) | |
resize = tv.transforms.Resize(size=(size, size), antialias=True) | |
img = resize(pad(img.permute(2,0,1))).permute(1,2,0) | |
return img, scale_factor, ph, pw | |
import torch | |
class Models: | |
@classmethod | |
def yolo(cls, img, threshold): | |
if '_yolo' not in cls.__dict__: | |
cls._yolo = YOLO(os.path.join(models_dir,'ultralytics','bbox','face_yolov8m.pt')) | |
print("YOLO:", cls._yolo.device, cls._yolo.device.type) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
cls._yolo = cls._yolo.to(device) | |
print("YOLO:", cls._yolo.device, cls._yolo.device.type) | |
dets = cls._yolo(img, conf=threshold)[0] | |
return dets | |
@classmethod | |
def lmk(cls, crop): | |
if '_lmk' not in cls.__dict__: | |
cls._lmk = InferenceSession(os.path.join(models_dir, 'landmarks', 'fan2_68_landmark.onnx')) | |
lmk = cls._lmk.run(None, {'input': crop})[0] | |
return lmk | |
def get_submatrix_with_padding(img, a, b, c, d): | |
pl = -min(a, 0) | |
pt = -min(b, 0) | |
pr = -min(img.shape[1] - c, 0) | |
pb = -min(img.shape[0] - d, 0) | |
a, b, c, d = max(a, 0), max(b, 0), min(c, img.shape[1]), min(d, img.shape[0]) | |
submatrix = img[b:d, a:c].permute(2,0,1) | |
pad = tv.transforms.Pad((pl, pt, pr, pb)) | |
submatrix = pad(submatrix).permute(1,2,0) | |
return submatrix | |
class Face: | |
def __init__(self, img, a, b, c, d, scale) -> None: | |
self.img = img | |
self.scale = scale | |
lmk = None | |
best_score = 0 | |
i = 0 | |
crop = get_submatrix_with_padding(self.img, a, b, c, d) | |
for curr_i in range(4): | |
rcrop, s, ph, pw = resize(crop.rot90(curr_i), 256) | |
rcrop = (rcrop[None] / 255).permute(0,3,1,2).type(torch.float32).numpy() | |
curr_lmk = Models.lmk(rcrop) | |
score = np.mean(curr_lmk[0,:,2]) | |
if score > best_score: | |
best_score = score | |
lmk = curr_lmk | |
i = curr_i | |
self.bbox = (a,b,c,d) | |
self.confidence = best_score | |
self.kps = np.vstack([ | |
lmk[0,[37,38,40,41],:2].mean(axis=0), | |
lmk[0,[43,44,46,47],:2].mean(axis=0), | |
lmk[0,[30,48,54],:2] | |
]) * 4 * s | |
self.T2 = np.array([[1, 0, -a], [0, 1, -b], [0, 0, 1]]) | |
rot = cv2.getRotationMatrix2D((128*s,128*s), 90*i, 1) | |
self.R = np.vstack((rot, np.array((0,0,1)))) | |
def crop(self, size, crop_factor): | |
S = np.array([[1/crop_factor, 0, 0], [0, 1/crop_factor, 0], [0, 0, 1]]) | |
M = estimate_norm(self.kps, size) | |
N = M @ self.R @ self.T2 | |
cx, cy = np.array((size/2, size/2, 1)) @ cv2.invertAffineTransform(M @ self.R @ self.T2).T | |
T3 = np.array([[1, 0, -cx], [0, 1, -cy], [0, 0, 1]]) | |
T4 = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]]) | |
N = N @ T4 @ S @ T3 | |
crop = cv2.warpAffine(self.img.numpy(), N, (size, size)) | |
crop = torch.from_numpy(crop)[None] | |
return N, crop | |
import time | |
def detect_faces(img, threshold, dImg, scale): | |
img = pad_to_stride(img, stride=32) | |
#The padding is probably fucked now.. TODO | |
dImg = pad_to_stride(dImg, stride=32) | |
delta = time.time() | |
dets = Models.yolo((dImg[None] / 255).permute(0,3,1,2), threshold) | |
delta = time.time() - delta | |
print("[?] YOLO ran in %s" % delta) | |
boxes = (dets.boxes.xyxy.reshape(-1,2,2)).reshape(-1,4) | |
faces = [] | |
delta = time.time() | |
for (a,b,c,d), box in zip(boxes.type(torch.int).cpu().numpy(), dets.boxes): | |
cx, cy = (a+c)/2, (b+d)/2 | |
r = np.sqrt((c-a)**2 + (d-b)**2) / 2 | |
a,b,c,d = [int(x * (1.0/scale)) for x in (cx - r, cy - r, cx + r, cy + r)] | |
#a,b,c,d = [int(x * (1.0)) for x in (cx - r, cy - r, cx + r, cy + r)] | |
face = Face(img, a, b, c, d, scale) | |
faces.append(face) | |
delta = time.time() - delta | |
print("[?] Alignment took %s" % delta) | |
return faces | |
def get_face_mesh(crop: torch.Tensor): | |
with mp.solutions.face_mesh.FaceMesh(max_num_faces=10) as face_mesh: | |
mesh = face_mesh.process(crop.mul(255).type(torch.uint8)[0].numpy()) | |
_, h, w, _ = crop.shape | |
if mesh.multi_face_landmarks is not None: | |
all_pts = np.array([np.array([(w*l.x, h*l.y) for l in lmks.landmark]) for lmks in mesh.multi_face_landmarks], dtype=np.int32) | |
idx = np.argmin(np.abs(all_pts - np.array([w/2,h/2])).sum(axis=(1,2))) | |
points = all_pts[idx] | |
return points | |
else: | |
return None | |
def mask_simple_square(face, M, crop): | |
# rotated bbox and size | |
h,w = crop.shape[1:3] | |
a,b,c,d = face.bbox | |
rect = np.array([ | |
[a,b,1], | |
[a,d,1], | |
[c,b,1], | |
[c,d,1], | |
]) @ M.T | |
lx, ly = [int(x) for x in np.min(rect, axis=0)] | |
hx, hy = [int(x) for x in np.max(rect, axis=0)] | |
mask = np.zeros((h,w), dtype=np.float32) | |
mask = cv2.rectangle(mask, (lx,ly), (hx,hy), 1, -1) | |
mask = torch.from_numpy(mask)[None] | |
return mask | |
def mask_convex_hull(face, M, crop): | |
h,w = crop.shape[1:3] | |
points = get_face_mesh(crop) | |
if points is None: return mask_simple_square(face, M, crop) | |
hull = ConvexHull(points) | |
mask = np.zeros((h,w), dtype=np.int32) | |
cv2.fillPoly(mask, [points[hull.vertices,:]], color=1) | |
mask = mask.astype(np.float32) | |
mask = torch.from_numpy(mask[None]) | |
return mask | |
def mask_BiSeNet(crop, | |
skin=True, | |
l_brow=True, | |
r_brow=True, | |
l_eye=True, | |
r_eye=True, | |
eye_g=True, | |
l_ear=True, | |
r_ear=True, | |
ear_r=True, | |
nose=True, | |
mouth=True, | |
u_lip=True, | |
l_lip=True, | |
neck=False, | |
neck_l=False, | |
cloth=False, | |
hair=False, | |
hat=False, | |
): | |
with torch.no_grad(): | |
bisenet = BiSeNet(n_classes=19) | |
bisenet.cuda() | |
model_path = os.path.join(models_dir, 'bisenet', '79999_iter.pth') | |
bisenet.load_state_dict(torch.load(model_path)) | |
bisenet.eval() | |
crop_t = crop.permute(0,3,1,2).cuda().float() | |
segms_t = bisenet(crop_t)[0].argmax(1).float() | |
dic = { | |
'skin': 1, | |
'l_brow': 2, | |
'r_brow': 3, | |
'l_eye': 4, | |
'r_eye': 5, | |
'eye_g': 6, | |
'l_ear': 7, | |
'r_ear': 8, | |
'ear_r': 9, | |
'nose': 10, | |
'mouth': 11, | |
'u_lip': 12, | |
'l_lip': 13, | |
'neck': 14, | |
'neck_l': 15, | |
'cloth': 16, | |
'hair': 17, | |
'hat': 18, | |
} | |
keep = [] | |
for k, v in locals().items(): | |
if k in dic and v: | |
keep.append(dic[k]) | |
face_part_ids = torch.tensor(keep).cuda() | |
segms_t = torch.sum(segms_t.repeat(len(face_part_ids), 1,1,1) == face_part_ids[...,None,None,None], axis=0).float() | |
mask = segms_t.cpu() | |
return mask | |
def mask_jonathandinu(crop, skin=True, nose=True, eye_g=True, l_eye=True, r_eye=True, l_brow=True, r_brow=True, | |
l_ear=True, r_ear=True, mouth=True, u_lip=True, l_lip=True, | |
hair=False, hat=False, ear_r=False, neck_l=False, neck=False, cloth=False): | |
global jonathandinu_image_processor, jonathandinu_model | |
device = ( | |
"cuda" | |
# Device for NVIDIA or AMD GPUs | |
if torch.cuda.is_available() | |
else "mps" | |
# Device for Apple Silicon (Metal Performance Shaders) | |
if torch.backends.mps.is_available() | |
else "cpu" | |
) | |
if 'jonathandinu_image_processor' not in globals(): | |
jonathandinu_image_processor = SegformerImageProcessor.from_pretrained("jonathandinu/face-parsing") | |
jonathandinu_model = SegformerForSemanticSegmentation.from_pretrained("jonathandinu/face-parsing") | |
jonathandinu_model.to(device) | |
inputs = jonathandinu_image_processor(images=crop.mul(255).type(torch.uint8), return_tensors="pt").to(device) | |
with torch.no_grad(): | |
outputs = jonathandinu_model(**inputs) | |
logits = outputs.logits # shape (batch_size, num_labels, ~height/4, ~width/4) | |
# resize output to match input image dimensions | |
upsampled_logits = tv.transforms.functional.resize(logits, crop.shape[1:3], antialias=True) | |
labels = upsampled_logits.argmax(dim=1) | |
ids = { | |
'skin': 1, | |
'nose': 2, | |
'eye_g': 3, | |
'l_eye': 4, | |
'r_eye': 5, | |
'l_brow': 6, | |
'r_brow': 7, | |
'l_ear': 8, | |
'r_ear': 9, | |
'mouth': 10, | |
'u_lip': 11, | |
'l_lip': 12, | |
'hair': 13, | |
'hat': 14, | |
'ear_r': 15, | |
'neck_l': 16, | |
'neck': 17, | |
'cloth': 18, | |
} | |
keep = [] | |
for k, v in locals().items(): | |
if k in ids and v: | |
keep.append(ids[k]) | |
face_part_ids = torch.tensor(keep).cuda() | |
mask = torch.sum(labels.repeat(len(face_part_ids), 1,1,1) == face_part_ids[...,None,None,None], axis=0).float().cpu() | |
return mask | |
mask_types = [ | |
'simple_square', | |
'convex_hull', | |
'BiSeNet', | |
'jonathandinu', | |
# 'clean BiSeNet', | |
] | |
mask_funs = { | |
'simple_square': mask_simple_square, | |
'convex_hull': mask_convex_hull, | |
'BiSeNet': lambda face, M, crop: mask_BiSeNet(crop), | |
'jonathandinu': lambda face, M, crop: mask_jonathandinu(crop), | |
# 'clean BiSeNet': mask_clean_BiSeNet, | |
} | |
def mask_crop(face, M, crop, mask_type): | |
mask = mask_funs[mask_type](face, M, crop) | |
return mask |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment