rachmadaniHaryono/hydrus_face_recognition_knn.py

## hydrus_face_recognition_knn.py
"""hydrus face recognition knn

modified from https://github.com/ageitgey/face_recognition/blob/master/examples/face_recognition_knn.py
"""
import re
import io

from hydrus import Client
import face_recognition
from tqdm import tqdm

from face_recognition_knn import show_prediction_labels_on_image
face_tag_regex = r'(?P<tag>(idol|person):(?P<name>.+) )\(((?P<full_count>([\d.])+)-)?(?P<count>([\d.])+)\)'
face_tag_file = 'face.txt'

with open(face_tag_file) as f: lines = f.read().splitlines()

invalid_lines = ['person:* (wildcard search)']
matches_zip = list(map(lambda x:(x, re.match(face_tag_regex, x)), lines))
invalid_matches_zip = [(x, match) for x, match in matches_zip if not re.match(face_tag_regex, x) and x not in invalid_lines]
valid_matches_zip = [(x, match) for x, match in matches_zip if match and x not in invalid_lines]
if invalid_matches_zip:
    print('unmatching lines:\n{}'.format('\n'.join([x for x, _ in invalid_matches_zip])))
access_key = '918efdc1d28ae710b46fc814ee818100a102786140ede877db94cedf3d733cc1'
cl = Client(access_key)
filtered_vmz = sorted(valid_matches_zip, key=lambda x: int(x[1].groupdict().get('count', '0').replace('.', '')), reverse=True)[:3]
X = []
verbose = True
y = []
for _, vmz in tqdm(filtered_vmz):
    tag = vmz.groupdict()['tag']
    name = vmz.groupdict()['name']
    # Loop through each training image for the current person
    name_succes_count = 0
    for f_id in tqdm(cl.search_files([tag])):
        if name_succes_count > 10:
            break
        image = face_recognition.load_image_file(io.BytesIO(cl.get_file(file_id=f_id)))
        face_bounding_boxes = face_recognition.face_locations(image)
        if len(face_bounding_boxes) != 1:
            # If there are no people (or too many people) in a training image, skip the image.
            if verbose:
                tqdm.write
                ("id {} not suitable for training: {}".format(
                    f_id,
                    "Didn't find a face" if len(face_bounding_boxes) < 1 else "Found more than one face"))
        else:
            # Add face encoding for current image to the training set
            X.append(face_recognition.face_encodings(image, known_face_locations=face_bounding_boxes)[0])
            y.append(name)
            name_succes_count += 1

def predict(hash_v):
    X_img = face_recognition.load_image_file(io.BytesIO(cl.get_file(hash_v)))
    X_face_locations = face_recognition.face_locations(X_img)
    faces_encodings = face_recognition.face_encodings(X_img, known_face_locations=X_face_locations)
    # Use the KNN model to find the best matches for the test face
    closest_distances = knn_clf.kneighbors(faces_encodings, n_neighbors=1)
    are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(X_face_locations))]
    # Predict classes and remove classifications that aren't within the threshold
    predictions = [(pred, loc) if rec else ("unknown", loc) for pred, loc, rec in zip(knn_clf.predict(faces_encodings), X_face_locations, are_matches)]
    show_prediction_labels_on_image(io.BytesIO(cl.get_file(hash_v)), predictions)
	"""hydrus face recognition knn

	modified from https://github.com/ageitgey/face_recognition/blob/master/examples/face_recognition_knn.py
	"""
	import re
	import io

	from hydrus import Client
	import face_recognition
	from tqdm import tqdm

	from face_recognition_knn import show_prediction_labels_on_image
	face_tag_regex = r'(?P<tag>(idol\|person):(?P<name>.+) )\(((?P<full_count>([\d.])+)-)?(?P<count>([\d.])+)\)'
	face_tag_file = 'face.txt'

	with open(face_tag_file) as f: lines = f.read().splitlines()

	invalid_lines = ['person:* (wildcard search)']
	matches_zip = list(map(lambda x:(x, re.match(face_tag_regex, x)), lines))
	invalid_matches_zip = [(x, match) for x, match in matches_zip if not re.match(face_tag_regex, x) and x not in invalid_lines]
	valid_matches_zip = [(x, match) for x, match in matches_zip if match and x not in invalid_lines]
	if invalid_matches_zip:
	print('unmatching lines:\n{}'.format('\n'.join([x for x, _ in invalid_matches_zip])))
	access_key = '918efdc1d28ae710b46fc814ee818100a102786140ede877db94cedf3d733cc1'
	cl = Client(access_key)
	filtered_vmz = sorted(valid_matches_zip, key=lambda x: int(x[1].groupdict().get('count', '0').replace('.', '')), reverse=True)[:3]
	X = []
	verbose = True
	y = []
	for _, vmz in tqdm(filtered_vmz):
	tag = vmz.groupdict()['tag']
	name = vmz.groupdict()['name']
	# Loop through each training image for the current person
	name_succes_count = 0
	for f_id in tqdm(cl.search_files([tag])):
	if name_succes_count > 10:
	break
	image = face_recognition.load_image_file(io.BytesIO(cl.get_file(file_id=f_id)))
	face_bounding_boxes = face_recognition.face_locations(image)
	if len(face_bounding_boxes) != 1:
	# If there are no people (or too many people) in a training image, skip the image.
	if verbose:
	tqdm.write
	("id {} not suitable for training: {}".format(
	f_id,
	"Didn't find a face" if len(face_bounding_boxes) < 1 else "Found more than one face"))
	else:
	# Add face encoding for current image to the training set
	X.append(face_recognition.face_encodings(image, known_face_locations=face_bounding_boxes)[0])
	y.append(name)
	name_succes_count += 1

	def predict(hash_v):
	X_img = face_recognition.load_image_file(io.BytesIO(cl.get_file(hash_v)))
	X_face_locations = face_recognition.face_locations(X_img)
	faces_encodings = face_recognition.face_encodings(X_img, known_face_locations=X_face_locations)
	# Use the KNN model to find the best matches for the test face
	closest_distances = knn_clf.kneighbors(faces_encodings, n_neighbors=1)
	are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(X_face_locations))]
	# Predict classes and remove classifications that aren't within the threshold
	predictions = [(pred, loc) if rec else ("unknown", loc) for pred, loc, rec in zip(knn_clf.predict(faces_encodings), X_face_locations, are_matches)]
	show_prediction_labels_on_image(io.BytesIO(cl.get_file(hash_v)), predictions)