Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
crop faces from pictures face_recognition lib and PIL
import os
import numpy as np
import cv2
import mrcnn.config
import mrcnn.utils
from mrcnn.model import MaskRCNN
from pathlib import Path
# Configuration that will be used by the Mask-RCNN library
class MaskRCNNConfig(mrcnn.config.Config):
NAME = "coco_pretrained_model_config"
NUM_CLASSES = 1 + 80 # COCO dataset has 80 classes + one background class
# Filter a list of Mask R-CNN detection results to get only the detected cars / trucks
def get_car_boxes(boxes, class_ids):
car_boxes = []
for i, box in enumerate(boxes):
# If the detected object isn't a car / truck, skip it
if class_ids[i] in [3, 8, 6]:
return np.array(car_boxes)
# Root directory of the project
ROOT_DIR = Path(".")
# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")
# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
# Directory of images to run detection on
IMAGE_DIR = os.path.join(ROOT_DIR, "images")
# Video file or camera to process - set this to 0 to use your webcam instead of a video file
VIDEO_SOURCE = "test_images/parking.mp4"
# Create a Mask-RCNN model in inference mode
model = MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=MaskRCNNConfig())
# Load pre-trained model
model.load_weights(COCO_MODEL_PATH, by_name=True)
# Location of parking spaces
parked_car_boxes = None
# Load the video file we want to run detection on
video_capture = cv2.VideoCapture(VIDEO_SOURCE)
# Loop over each frame of video
while video_capture.isOpened():
success, frame =
if not success:
# Convert the image from BGR color (which OpenCV uses) to RGB color
rgb_image = frame[:, :, ::-1]
# Run the image through the Mask R-CNN model to get results.
results = model.detect([rgb_image], verbose=0)
# Mask R-CNN assumes we are running detection on multiple images.
# We only passed in one image to detect, so only grab the first result.
r = results[0]
# The r variable will now have the results of detection:
# - r['rois'] are the bounding box of each detected object
# - r['class_ids'] are the class id (type) of each detected object
# - r['scores'] are the confidence scores for each detection
# - r['masks'] are the object masks for each detected object (which gives you the object outline)
# Filter the results to only grab the car / truck bounding boxes
car_boxes = get_car_boxes(r['rois'], r['class_ids'])
print("Cars found in frame of video:")
# Draw each box on the frame
for box in car_boxes:
print("Car: ", box)
y1, x1, y2, x2 = box
# Draw the box
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 1)
# Show the frame of video on the screen
cv2.imshow('Video', frame)
# Hit 'q' to quit
if cv2.waitKey(1) & 0xFF == ord('q'):
# Clean up everything when finished
import face_recognition
import cv2
# This code finds all faces in a list of images using the CNN model.
# This demo is for the _special case_ when you need to find faces in LOTS of images very quickly and all the images
# are the exact same size. This is common in video processing applications where you have lots of video frames
# to process.
# If you are processing a lot of images and using a GPU with CUDA, batch processing can be ~3x faster then processing
# single images at a time. But if you aren't using a GPU, then batch processing isn't going to be very helpful.
# PLEASE NOTE: This example requires OpenCV (the `cv2` library) to be installed only to read the video file.
# OpenCV is *not* required to use the face_recognition library. It's only required if you want to run this
# specific demo. If you have trouble installing it, try any of the other demos that don't require it instead.
# Open video file
video_capture = cv2.VideoCapture("short_hamilton_clip.mp4")
frames = []
frame_count = 0
while video_capture.isOpened():
# Grab a single frame of video
ret, frame =
# Bail out when the video file ends
if not ret:
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
frame = frame[:, :, ::-1]
# Save each frame of the video to a list
frame_count += 1
# Every 128 frames (the default batch size), batch process the list of frames to find faces
if len(frames) == 128:
batch_of_face_locations = face_recognition.batch_face_locations(frames, number_of_times_to_upsample=0)
# Now let's list all the faces we found in all 128 frames
for frame_number_in_batch, face_locations in enumerate(batch_of_face_locations):
number_of_faces_in_frame = len(face_locations)
frame_number = frame_count - 128 + frame_number_in_batch
print("I found {} face(s) in frame #{}.".format(number_of_faces_in_frame, frame_number))
for face_location in face_locations:
# Print the location of each face in this frame
top, right, bottom, left = face_location
print(" - A face is located at pixel location Top: {}, Left: {}, Bottom: {}, Right: {}".format(top, left, bottom, right))
# Clear the frames array to start the next batch
frames = []
from PIL import Image
import face_recognition
import time, os, fnmatch, shutil
import redis
inputImg = "/Users/antigen/Downloads/2011-volvo-s60_100323431_h.jpg"
...:for file_name in glob.glob("./*.JPG"):
...: image = face_recognition.load_image_file(file_name)
...: faces = face_recognition.face_locations(image)
...: for i in range(len(faces)):
...: top, right, bottom, left = faces[i]
...: faceImage = image[top:bottom, left:right]
...: final = Image.fromarray(faceImage)
...: t = time.localtime()
...: timestamp = time.strftime('%Y_%b_%d_%H%M', t)
...: FILE_NAME = ("extracted_face_" + timestamp)
...:"./images" + "/" + FILE_NAME +"_%s.png" % (str(i)), "PNG")
image = face_recognition.load_image_file(inputImg)
faces = face_recognition.face_locations(image)
for i in range(len(faces)):
top, right, bottom, left = faces[i]
faceImage = image[top:bottom, left:right]
final = Image.fromarray(faceImage)
t = time.localtime()
timestamp = time.strftime('%Y_%b_%d_%H%M', t)
FILE_NAME = ("extracted_face_" + timestamp)"_%s.png" % (str(i)), "PNG")
import msgpack
def process_face_encodings_return_messagepack(image_name):
loaded_image = face_recognition.load_image_file(image_name)
loaded_face_encoding = face_recognition.face_encodings(loaded_image)[0].tolist()
return msgpack.packb(loaded_face_encoding, use_bin_type=True)
def process_messagepack_return_face_encodings(key_value):
return msgpack.unpackb(key_value, raw=False)
import numpy as np
def convert_list_to_numpy_array(new_list):
return np.asarray(new_list)
client = redis.StrictRedis(db=9, decode_responses=True)
data = {}
cursor = '0'
while cursor != 0:
cursor, keys = client.scan(cursor=cursor, count=1000000)
values = client.mget(*keys)
values = [value for value in values if not value == None]
data.update(dict(zip(keys, values)))
import face_recognition
import cv2
from datetime import datetime, timedelta
import numpy as np
import platform
import pickle
# Our list of known face encodings and a matching list of metadata about each face.
known_face_encodings = [] # each encoding is a list
known_face_metadata = []
def save_known_faces():
with open("known_faces.dat", "wb") as face_data_file:
face_data = [known_face_encodings, known_face_metadata]
pickle.dump(face_data, face_data_file)
print("Known faces backed up to disk.")
def load_known_faces():
global known_face_encodings, known_face_metadata
with open("known_faces.dat", "rb") as face_data_file:
known_face_encodings, known_face_metadata = pickle.load(face_data_file)
print("Known faces loaded from disk.")
except FileNotFoundError as e:
print("No previous face data found - starting with a blank known face list.")
def running_on_jetson_nano():
# To make the same code work on a laptop or on a Jetson Nano, we'll detect when we are running on the Nano
# so that we can access the camera correctly in that case.
# On a normal Intel laptop, platform.machine() will be "x86_64" instead of "aarch64"
return platform.machine() == "aarch64"
def get_jetson_gstreamer_source(capture_width=1280, capture_height=720, display_width=1280, display_height=720, framerate=60, flip_method=0):
Return an OpenCV-compatible video source description that uses gstreamer to capture video from the camera on a Jetson Nano
return (
f'nvarguscamerasrc ! video/x-raw(memory:NVMM), ' +
f'width=(int){capture_width}, height=(int){capture_height}, ' +
f'format=(string)NV12, framerate=(fraction){framerate}/1 ! ' +
f'nvvidconv flip-method={flip_method} ! ' +
f'video/x-raw, width=(int){display_width}, height=(int){display_height}, format=(string)BGRx ! ' +
'videoconvert ! video/x-raw, format=(string)BGR ! appsink'
def register_new_face(face_encoding, face_image):
Add a new person to our list of known faces
# Add the face encoding to the list of known faces
# Add a matching dictionary entry to our metadata list.
# We can use this to keep track of how many times a person has visited, when we last saw them, etc.
"seen_count": 1,
"seen_frames": 1,
"face_image": face_image,
def lookup_known_face(face_encoding):
See if this is a face we already have in our face list
metadata = None
# If our known face list is empty, just return nothing since we can't possibly have seen this face.
if len(known_face_encodings) == 0:
return metadata
# Calculate the face distance between the unknown face and every face on in our known face list
# This will return a floating point number between 0.0 and 1.0 for each known face. The smaller the number,
# the more similar that face was to the unknown face.
face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
# Get the known face that had the lowest distance (i.e. most similar) from the unknown face.
best_match_index = np.argmin(face_distances)
# If the face with the lowest distance had a distance under 0.6, we consider it a face match.
# 0.6 comes from how the face recognition model was trained. It was trained to make sure pictures
# of the same person always were less than 0.6 away from each other.
# Here, we are loosening the threshold a little bit to 0.65 because it is unlikely that two very similar
# people will come up to the door at the same time.
if face_distances[best_match_index] < 0.65:
# If we have a match, look up the metadata we've saved for it (like the first time we saw it, etc)
metadata = known_face_metadata[best_match_index]
# Update the metadata for the face so we can keep track of how recently we have seen this face.
metadata["last_seen"] =
metadata["seen_frames"] += 1
# We'll also keep a total "seen count" that tracks how many times this person has come to the door.
# But we can say that if we have seen this person within the last 5 minutes, it is still the same
# visit, not a new visit. But if they go away for awhile and come back, that is a new visit.
if - metadata["first_seen_this_interaction"] > timedelta(minutes=5):
metadata["first_seen_this_interaction"] =
metadata["seen_count"] += 1
return metadata
def main_loop():
# Get access to the webcam. The method is different depending on if this is running on a laptop or a Jetson Nano.
if running_on_jetson_nano():
# Accessing the camera with OpenCV on a Jetson Nano requires gstreamer with a custom gstreamer source string
video_capture = cv2.VideoCapture(get_jetson_gstreamer_source(), cv2.CAP_GSTREAMER)
# Accessing the camera with OpenCV on a laptop just requires passing in the number of the webcam (usually 0)
# Note: You can pass in a filename instead if you want to process a video file instead of a live camera stream
video_capture = cv2.VideoCapture(0)
# Track how long since we last saved a copy of our known faces to disk as a backup.
number_of_faces_since_save = 0
while True:
# Grab a single frame of video
ret, frame =
# Resize frame of video to 1/4 size for faster face recognition processing
small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
# Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
rgb_small_frame = small_frame[:, :, ::-1]
# Find all the face locations and face encodings in the current frame of video
face_locations = face_recognition.face_locations(rgb_small_frame)
face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
# Loop through each detected face and see if it is one we have seen before
# If so, we'll give it a label that we'll draw on top of the video.
face_labels = []
for face_location, face_encoding in zip(face_locations, face_encodings):
# See if this face is in our list of known faces.
metadata = lookup_known_face(face_encoding)
# If we found the face, label the face with some useful information.
if metadata is not None:
time_at_door = - metadata['first_seen_this_interaction']
face_label = f"At door {int(time_at_door.total_seconds())}s"
# If this is a brand new face, add it to our list of known faces
face_label = "New visitor!"
# Grab the image of the the face from the current frame of video
top, right, bottom, left = face_location
face_image = small_frame[top:bottom, left:right]
face_image = cv2.resize(face_image, (150, 150))
# Add the new face to our known face data
register_new_face(face_encoding, face_image)
# Draw a box around each face and label each face
for (top, right, bottom, left), face_label in zip(face_locations, face_labels):
# Scale back up face locations since the frame we detected in was scaled to 1/4 size
top *= 4
right *= 4
bottom *= 4
left *= 4
# Draw a box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
cv2.putText(frame, face_label, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)
# Display recent visitor images
number_of_recent_visitors = 0
for metadata in known_face_metadata:
# If we have seen this person in the last minute, draw their image
if - metadata["last_seen"] < timedelta(seconds=10) and metadata["seen_frames"] > 5:
# Draw the known face image
x_position = number_of_recent_visitors * 150
frame[30:180, x_position:x_position + 150] = metadata["face_image"]
number_of_recent_visitors += 1
# Label the image with how many times they have visited
visits = metadata['seen_count']
visit_label = f"{visits} visits"
if visits == 1:
visit_label = "First visit"
cv2.putText(frame, visit_label, (x_position + 10, 170), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255, 255, 255), 1)
if number_of_recent_visitors > 0:
cv2.putText(frame, "Visitors at Door", (5, 18), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)
# Display the final frame of video with boxes drawn around each detected fames
cv2.imshow('Video', frame)
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
# We need to save our known faces back to disk every so often in case something crashes.
if len(face_locations) > 0 and number_of_faces_since_save > 100:
number_of_faces_since_save = 0
number_of_faces_since_save += 1
# Release handle to the webcam
if __name__ == "__main__":
import glob
from PIL import Image
import face_recognition
import time, os, fnmatch, shutil
import numpy as np
# get a list of whole blob
# keep a list array or pandas loaded from hdf5
known_images = []
known_faces = []
for file_name in glob.glob("./*.png"):
image = face_recognition.load_image_file(file_name)
known_face_encoding = face_recognition.face_encodings(image)
if known_face_encoding != []:
image_to_test = face_recognition.load_image_file("extracted_face_2019_Jun_28_2153_1.png")
image_to_test_encoding = face_recognition.face_encodings(image_to_test)
if image_to_test_encoding != []:
face_distances = face_recognition.face_distance(known_faces, np.array(image_to_test_encoding))
for i, face_distance in enumerate(face_distances):
print("The test image has a distance of {:.2} from known image #{}".format(face_distance, i))
print("- With a normal cutoff of 0.6, would the test image match the known image? {}".format(face_distance < 0.6))
print("- With a very strict cutoff of 0.6, would the test image match the known image? {}".format(face_distance < 0.6))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.