Skip to content

Instantly share code, notes, and snippets.

@rachmadaniHaryono
Created August 5, 2019 07:59
Show Gist options
  • Save rachmadaniHaryono/bac5b0902aba6278611188dfaf57640a to your computer and use it in GitHub Desktop.
Save rachmadaniHaryono/bac5b0902aba6278611188dfaf57640a to your computer and use it in GitHub Desktop.
hydrus face recognition knn
"""hydrus face recognition knn
modified from https://github.com/ageitgey/face_recognition/blob/master/examples/face_recognition_knn.py
"""
import re
import io
from hydrus import Client
import face_recognition
from tqdm import tqdm
from face_recognition_knn import show_prediction_labels_on_image
face_tag_regex = r'(?P<tag>(idol|person):(?P<name>.+) )\(((?P<full_count>([\d.])+)-)?(?P<count>([\d.])+)\)'
face_tag_file = 'face.txt'
with open(face_tag_file) as f: lines = f.read().splitlines()
invalid_lines = ['person:* (wildcard search)']
matches_zip = list(map(lambda x:(x, re.match(face_tag_regex, x)), lines))
invalid_matches_zip = [(x, match) for x, match in matches_zip if not re.match(face_tag_regex, x) and x not in invalid_lines]
valid_matches_zip = [(x, match) for x, match in matches_zip if match and x not in invalid_lines]
if invalid_matches_zip:
print('unmatching lines:\n{}'.format('\n'.join([x for x, _ in invalid_matches_zip])))
access_key = '918efdc1d28ae710b46fc814ee818100a102786140ede877db94cedf3d733cc1'
cl = Client(access_key)
filtered_vmz = sorted(valid_matches_zip, key=lambda x: int(x[1].groupdict().get('count', '0').replace('.', '')), reverse=True)[:3]
X = []
verbose = True
y = []
for _, vmz in tqdm(filtered_vmz):
tag = vmz.groupdict()['tag']
name = vmz.groupdict()['name']
# Loop through each training image for the current person
name_succes_count = 0
for f_id in tqdm(cl.search_files([tag])):
if name_succes_count > 10:
break
image = face_recognition.load_image_file(io.BytesIO(cl.get_file(file_id=f_id)))
face_bounding_boxes = face_recognition.face_locations(image)
if len(face_bounding_boxes) != 1:
# If there are no people (or too many people) in a training image, skip the image.
if verbose:
tqdm.write
("id {} not suitable for training: {}".format(
f_id,
"Didn't find a face" if len(face_bounding_boxes) < 1 else "Found more than one face"))
else:
# Add face encoding for current image to the training set
X.append(face_recognition.face_encodings(image, known_face_locations=face_bounding_boxes)[0])
y.append(name)
name_succes_count += 1
def predict(hash_v):
X_img = face_recognition.load_image_file(io.BytesIO(cl.get_file(hash_v)))
X_face_locations = face_recognition.face_locations(X_img)
faces_encodings = face_recognition.face_encodings(X_img, known_face_locations=X_face_locations)
# Use the KNN model to find the best matches for the test face
closest_distances = knn_clf.kneighbors(faces_encodings, n_neighbors=1)
are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(X_face_locations))]
# Predict classes and remove classifications that aren't within the threshold
predictions = [(pred, loc) if rec else ("unknown", loc) for pred, loc, rec in zip(knn_clf.predict(faces_encodings), X_face_locations, are_matches)]
show_prediction_labels_on_image(io.BytesIO(cl.get_file(hash_v)), predictions)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment