Star
You must be signed in to star a gist
crop faces from pictures face_recognition lib and PIL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import numpy as np | |
import cv2 | |
import mrcnn.config | |
import mrcnn.utils | |
from mrcnn.model import MaskRCNN | |
from pathlib import Path | |
# Configuration that will be used by the Mask-RCNN library | |
class MaskRCNNConfig(mrcnn.config.Config): | |
NAME = "coco_pretrained_model_config" | |
IMAGES_PER_GPU = 1 | |
GPU_COUNT = 1 | |
NUM_CLASSES = 1 + 80 # COCO dataset has 80 classes + one background class | |
DETECTION_MIN_CONFIDENCE = 0.6 | |
# Filter a list of Mask R-CNN detection results to get only the detected cars / trucks | |
def get_car_boxes(boxes, class_ids): | |
car_boxes = [] | |
for i, box in enumerate(boxes): | |
# If the detected object isn't a car / truck, skip it | |
if class_ids[i] in [3, 8, 6]: | |
car_boxes.append(box) | |
return np.array(car_boxes) | |
# Root directory of the project | |
ROOT_DIR = Path(".") | |
# Directory to save logs and trained model | |
MODEL_DIR = os.path.join(ROOT_DIR, "logs") | |
# Local path to trained weights file | |
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") | |
# Download COCO trained weights from Releases if needed | |
if not os.path.exists(COCO_MODEL_PATH): | |
mrcnn.utils.download_trained_weights(COCO_MODEL_PATH) | |
# Directory of images to run detection on | |
IMAGE_DIR = os.path.join(ROOT_DIR, "images") | |
# Video file or camera to process - set this to 0 to use your webcam instead of a video file | |
VIDEO_SOURCE = "test_images/parking.mp4" | |
# Create a Mask-RCNN model in inference mode | |
model = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig()) | |
# Load pre-trained model | |
model.load_weights(COCO_MODEL_PATH, by_name=True) | |
# Location of parking spaces | |
parked_car_boxes = None | |
# Load the video file we want to run detection on | |
video_capture = cv2.VideoCapture(VIDEO_SOURCE) | |
# Loop over each frame of video | |
while video_capture.isOpened(): | |
success, frame = video_capture.read() | |
if not success: | |
break | |
# Convert the image from BGR color (which OpenCV uses) to RGB color | |
rgb_image = frame[:, :, ::-1] | |
# Run the image through the Mask R-CNN model to get results. | |
results = model.detect([rgb_image], verbose=0) | |
# Mask R-CNN assumes we are running detection on multiple images. | |
# We only passed in one image to detect, so only grab the first result. | |
r = results[0] | |
# The r variable will now have the results of detection: | |
# - r['rois'] are the bounding box of each detected object | |
# - r['class_ids'] are the class id (type) of each detected object | |
# - r['scores'] are the confidence scores for each detection | |
# - r['masks'] are the object masks for each detected object (which gives you the object outline) | |
# Filter the results to only grab the car / truck bounding boxes | |
car_boxes = get_car_boxes(r['rois'], r['class_ids']) | |
print("Cars found in frame of video:") | |
# Draw each box on the frame | |
for box in car_boxes: | |
print("Car: ", box) | |
y1, x1, y2, x2 = box | |
# Draw the box | |
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 1) | |
# Show the frame of video on the screen | |
cv2.imshow('Video', frame) | |
# Hit 'q' to quit | |
if cv2.waitKey(1) & 0xFF == ord('q'): | |
break | |
# Clean up everything when finished | |
video_capture.release() | |
cv2.destroyAllWindows() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import face_recognition | |
import cv2 | |
# This code finds all faces in a list of images using the CNN model. | |
# | |
# This demo is for the _special case_ when you need to find faces in LOTS of images very quickly and all the images | |
# are the exact same size. This is common in video processing applications where you have lots of video frames | |
# to process. | |
# | |
# If you are processing a lot of images and using a GPU with CUDA, batch processing can be ~3x faster then processing | |
# single images at a time. But if you aren't using a GPU, then batch processing isn't going to be very helpful. | |
# | |
# PLEASE NOTE: This example requires OpenCV (the `cv2` library) to be installed only to read the video file. | |
# OpenCV is *not* required to use the face_recognition library. It's only required if you want to run this | |
# specific demo. If you have trouble installing it, try any of the other demos that don't require it instead. | |
# Open video file | |
video_capture = cv2.VideoCapture("short_hamilton_clip.mp4") | |
frames = [] | |
frame_count = 0 | |
while video_capture.isOpened(): | |
# Grab a single frame of video | |
ret, frame = video_capture.read() | |
# Bail out when the video file ends | |
if not ret: | |
break | |
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses) | |
frame = frame[:, :, ::-1] | |
# Save each frame of the video to a list | |
frame_count += 1 | |
frames.append(frame) | |
# Every 128 frames (the default batch size), batch process the list of frames to find faces | |
if len(frames) == 128: | |
batch_of_face_locations = face_recognition.batch_face_locations(frames, number_of_times_to_upsample=0) | |
# Now let's list all the faces we found in all 128 frames | |
for frame_number_in_batch, face_locations in enumerate(batch_of_face_locations): | |
number_of_faces_in_frame = len(face_locations) | |
frame_number = frame_count - 128 + frame_number_in_batch | |
print("I found {} face(s) in frame #{}.".format(number_of_faces_in_frame, frame_number)) | |
for face_location in face_locations: | |
# Print the location of each face in this frame | |
top, right, bottom, left = face_location | |
print(" - A face is located at pixel location Top: {}, Left: {}, Bottom: {}, Right: {}".format(top, left, bottom, right)) | |
# Clear the frames array to start the next batch | |
frames = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image | |
import face_recognition | |
import time, os, fnmatch, shutil | |
import redis | |
inputImg = "/Users/antigen/Downloads/2011-volvo-s60_100323431_h.jpg" | |
...:for file_name in glob.glob("./*.JPG"): | |
...: image = face_recognition.load_image_file(file_name) | |
...: faces = face_recognition.face_locations(image) | |
...: | |
...: for i in range(len(faces)): | |
...: top, right, bottom, left = faces[i] | |
...: | |
...: faceImage = image[top:bottom, left:right] | |
...: final = Image.fromarray(faceImage) | |
...: t = time.localtime() | |
...: timestamp = time.strftime('%Y_%b_%d_%H%M', t) | |
...: FILE_NAME = ("extracted_face_" + timestamp) | |
...: | |
...: final.save("./images" + "/" + FILE_NAME +"_%s.png" % (str(i)), "PNG") | |
...: | |
image = face_recognition.load_image_file(inputImg) | |
faces = face_recognition.face_locations(image) | |
for i in range(len(faces)): | |
top, right, bottom, left = faces[i] | |
faceImage = image[top:bottom, left:right] | |
final = Image.fromarray(faceImage) | |
t = time.localtime() | |
timestamp = time.strftime('%Y_%b_%d_%H%M', t) | |
FILE_NAME = ("extracted_face_" + timestamp) | |
final.save(FILE_NAME+"_%s.png" % (str(i)), "PNG") | |
import msgpack | |
def process_face_encodings_return_messagepack(image_name): | |
loaded_image = face_recognition.load_image_file(image_name) | |
loaded_face_encoding = face_recognition.face_encodings(loaded_image)[0].tolist() | |
return msgpack.packb(loaded_face_encoding, use_bin_type=True) | |
def process_messagepack_return_face_encodings(key_value): | |
return msgpack.unpackb(key_value, raw=False) | |
import numpy as np | |
def convert_list_to_numpy_array(new_list): | |
return np.asarray(new_list) | |
client = redis.StrictRedis(db=9, decode_responses=True) | |
data = {} | |
cursor = '0' | |
while cursor != 0: | |
cursor, keys = client.scan(cursor=cursor, count=1000000) | |
values = client.mget(*keys) | |
values = [value for value in values if not value == None] | |
data.update(dict(zip(keys, values))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import face_recognition | |
import cv2 | |
from datetime import datetime, timedelta | |
import numpy as np | |
import platform | |
import pickle | |
# Our list of known face encodings and a matching list of metadata about each face. | |
known_face_encodings = [] # each encoding is a list | |
known_face_metadata = [] | |
def save_known_faces(): | |
with open("known_faces.dat", "wb") as face_data_file: | |
face_data = [known_face_encodings, known_face_metadata] | |
pickle.dump(face_data, face_data_file) | |
print("Known faces backed up to disk.") | |
print(known_face_encodings) | |
def load_known_faces(): | |
global known_face_encodings, known_face_metadata | |
try: | |
with open("known_faces.dat", "rb") as face_data_file: | |
known_face_encodings, known_face_metadata = pickle.load(face_data_file) | |
print("Known faces loaded from disk.") | |
except FileNotFoundError as e: | |
print("No previous face data found - starting with a blank known face list.") | |
pass | |
def running_on_jetson_nano(): | |
# To make the same code work on a laptop or on a Jetson Nano, we'll detect when we are running on the Nano | |
# so that we can access the camera correctly in that case. | |
# On a normal Intel laptop, platform.machine() will be "x86_64" instead of "aarch64" | |
return platform.machine() == "aarch64" | |
def get_jetson_gstreamer_source(capture_width=1280, capture_height=720, display_width=1280, display_height=720, framerate=60, flip_method=0): | |
""" | |
Return an OpenCV-compatible video source description that uses gstreamer to capture video from the camera on a Jetson Nano | |
""" | |
return ( | |
f'nvarguscamerasrc ! video/x-raw(memory:NVMM), ' + | |
f'width=(int){capture_width}, height=(int){capture_height}, ' + | |
f'format=(string)NV12, framerate=(fraction){framerate}/1 ! ' + | |
f'nvvidconv flip-method={flip_method} ! ' + | |
f'video/x-raw, width=(int){display_width}, height=(int){display_height}, format=(string)BGRx ! ' + | |
'videoconvert ! video/x-raw, format=(string)BGR ! appsink' | |
) | |
def register_new_face(face_encoding, face_image): | |
""" | |
Add a new person to our list of known faces | |
""" | |
# Add the face encoding to the list of known faces | |
known_face_encodings.append(face_encoding) | |
# Add a matching dictionary entry to our metadata list. | |
# We can use this to keep track of how many times a person has visited, when we last saw them, etc. | |
known_face_metadata.append({ | |
"first_seen": datetime.now(), | |
"first_seen_this_interaction": datetime.now(), | |
"last_seen": datetime.now(), | |
"seen_count": 1, | |
"seen_frames": 1, | |
"face_image": face_image, | |
}) | |
def lookup_known_face(face_encoding): | |
""" | |
See if this is a face we already have in our face list | |
""" | |
metadata = None | |
# If our known face list is empty, just return nothing since we can't possibly have seen this face. | |
if len(known_face_encodings) == 0: | |
return metadata | |
# Calculate the face distance between the unknown face and every face on in our known face list | |
# This will return a floating point number between 0.0 and 1.0 for each known face. The smaller the number, | |
# the more similar that face was to the unknown face. | |
face_distances = face_recognition.face_distance(known_face_encodings, face_encoding) | |
# Get the known face that had the lowest distance (i.e. most similar) from the unknown face. | |
best_match_index = np.argmin(face_distances) | |
# If the face with the lowest distance had a distance under 0.6, we consider it a face match. | |
# 0.6 comes from how the face recognition model was trained. It was trained to make sure pictures | |
# of the same person always were less than 0.6 away from each other. | |
# Here, we are loosening the threshold a little bit to 0.65 because it is unlikely that two very similar | |
# people will come up to the door at the same time. | |
if face_distances[best_match_index] < 0.65: | |
# If we have a match, look up the metadata we've saved for it (like the first time we saw it, etc) | |
metadata = known_face_metadata[best_match_index] | |
# Update the metadata for the face so we can keep track of how recently we have seen this face. | |
metadata["last_seen"] = datetime.now() | |
metadata["seen_frames"] += 1 | |
# We'll also keep a total "seen count" that tracks how many times this person has come to the door. | |
# But we can say that if we have seen this person within the last 5 minutes, it is still the same | |
# visit, not a new visit. But if they go away for awhile and come back, that is a new visit. | |
if datetime.now() - metadata["first_seen_this_interaction"] > timedelta(minutes=5): | |
metadata["first_seen_this_interaction"] = datetime.now() | |
metadata["seen_count"] += 1 | |
return metadata | |
def main_loop(): | |
# Get access to the webcam. The method is different depending on if this is running on a laptop or a Jetson Nano. | |
if running_on_jetson_nano(): | |
# Accessing the camera with OpenCV on a Jetson Nano requires gstreamer with a custom gstreamer source string | |
video_capture = cv2.VideoCapture(get_jetson_gstreamer_source(), cv2.CAP_GSTREAMER) | |
else: | |
# Accessing the camera with OpenCV on a laptop just requires passing in the number of the webcam (usually 0) | |
# Note: You can pass in a filename instead if you want to process a video file instead of a live camera stream | |
video_capture = cv2.VideoCapture(0) | |
# Track how long since we last saved a copy of our known faces to disk as a backup. | |
number_of_faces_since_save = 0 | |
while True: | |
# Grab a single frame of video | |
ret, frame = video_capture.read() | |
# Resize frame of video to 1/4 size for faster face recognition processing | |
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) | |
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses) | |
rgb_small_frame = small_frame[:, :, ::-1] | |
# Find all the face locations and face encodings in the current frame of video | |
face_locations = face_recognition.face_locations(rgb_small_frame) | |
face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations) | |
# Loop through each detected face and see if it is one we have seen before | |
# If so, we'll give it a label that we'll draw on top of the video. | |
face_labels = [] | |
for face_location, face_encoding in zip(face_locations, face_encodings): | |
# See if this face is in our list of known faces. | |
metadata = lookup_known_face(face_encoding) | |
# If we found the face, label the face with some useful information. | |
if metadata is not None: | |
time_at_door = datetime.now() - metadata['first_seen_this_interaction'] | |
face_label = f"At door {int(time_at_door.total_seconds())}s" | |
# If this is a brand new face, add it to our list of known faces | |
else: | |
face_label = "New visitor!" | |
# Grab the image of the the face from the current frame of video | |
top, right, bottom, left = face_location | |
face_image = small_frame[top:bottom, left:right] | |
face_image = cv2.resize(face_image, (150, 150)) | |
# Add the new face to our known face data | |
register_new_face(face_encoding, face_image) | |
face_labels.append(face_label) | |
# Draw a box around each face and label each face | |
for (top, right, bottom, left), face_label in zip(face_locations, face_labels): | |
# Scale back up face locations since the frame we detected in was scaled to 1/4 size | |
top *= 4 | |
right *= 4 | |
bottom *= 4 | |
left *= 4 | |
# Draw a box around the face | |
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2) | |
# Draw a label with a name below the face | |
cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED) | |
cv2.putText(frame, face_label, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1) | |
# Display recent visitor images | |
number_of_recent_visitors = 0 | |
for metadata in known_face_metadata: | |
# If we have seen this person in the last minute, draw their image | |
if datetime.now() - metadata["last_seen"] < timedelta(seconds=10) and metadata["seen_frames"] > 5: | |
# Draw the known face image | |
x_position = number_of_recent_visitors * 150 | |
frame[30:180, x_position:x_position + 150] = metadata["face_image"] | |
number_of_recent_visitors += 1 | |
# Label the image with how many times they have visited | |
visits = metadata['seen_count'] | |
visit_label = f"{visits} visits" | |
if visits == 1: | |
visit_label = "First visit" | |
cv2.putText(frame, visit_label, (x_position + 10, 170), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1) | |
if number_of_recent_visitors > 0: | |
cv2.putText(frame, "Visitors at Door", (5, 18), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1) | |
# Display the final frame of video with boxes drawn around each detected fames | |
cv2.imshow('Video', frame) | |
# Hit 'q' on the keyboard to quit! | |
if cv2.waitKey(1) & 0xFF == ord('q'): | |
save_known_faces() | |
break | |
# We need to save our known faces back to disk every so often in case something crashes. | |
if len(face_locations) > 0 and number_of_faces_since_save > 100: | |
save_known_faces() | |
number_of_faces_since_save = 0 | |
else: | |
number_of_faces_since_save += 1 | |
# Release handle to the webcam | |
video_capture.release() | |
cv2.destroyAllWindows() | |
if __name__ == "__main__": | |
load_known_faces() | |
main_loop() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
from PIL import Image | |
import face_recognition | |
import time, os, fnmatch, shutil | |
import numpy as np | |
# get a list of whole blob | |
# keep a list array or pandas loaded from hdf5 | |
known_images = [] | |
known_faces = [] | |
for file_name in glob.glob("./*.png"): | |
image = face_recognition.load_image_file(file_name) | |
known_face_encoding = face_recognition.face_encodings(image) | |
if known_face_encoding != []: | |
known_faces.append(np.array(known_face_encoding[0])) | |
image_to_test = face_recognition.load_image_file("extracted_face_2019_Jun_28_2153_1.png") | |
image_to_test_encoding = face_recognition.face_encodings(image_to_test) | |
if image_to_test_encoding != []: | |
face_distances = face_recognition.face_distance(known_faces, np.array(image_to_test_encoding)) | |
for i, face_distance in enumerate(face_distances): | |
print("The test image has a distance of {:.2} from known image #{}".format(face_distance, i)) | |
print("- With a normal cutoff of 0.6, would the test image match the known image? {}".format(face_distance < 0.6)) | |
print("- With a very strict cutoff of 0.6, would the test image match the known image? {}".format(face_distance < 0.6)) | |
print("**************************************************************") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment