Skip to content

Instantly share code, notes, and snippets.

@tatmush
Last active May 3, 2020 20:38
Show Gist options
  • Save tatmush/e1daffbc57bcab93eb5c3c8d2cf996f7 to your computer and use it in GitHub Desktop.
Save tatmush/e1daffbc57bcab93eb5c3c8d2cf996f7 to your computer and use it in GitHub Desktop.
1. Collect images from the internet which can be done using code. 2. Resize the images. - `find . -maxdepth 1 -iname "*jpeg" | xargs -L1 -I{} convert -resize 600x800\> "{}" _resized/"{}"` 3. Detect faces use script 4. Crop the detected face 5. pick only Noku's face/choose Noku 6. Make Noku encodings. 7. Detect Noku's face. 8. find the `Hie` subt…
# import the necessary packages
import face_recognition
import argparse
import imutils
import pickle
import time
import cv2
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-e", "--encodings", required=True, help="path to serialized db of facial encodings")
ap.add_argument("-i", "--input", required=True, help="path to input video")
ap.add_argument("-o", "--output", type=str, help="path to output video")
ap.add_argument("-y", "--display", type=int, default=1, help="whether or not to display output frame to screen")
ap.add_argument('-t', '--tolerance', type=float, default='0.6', help='How much distance between faces to consider it a match. Lower is more strict. 0.6 is typical best performance.')
ap.add_argument("-d", "--detection-method", type=str, default="cnn", help="face detection model to use: either `hog` or `cnn`")
args = vars(ap.parse_args())
# load the known faces and embeddings
print("[INFO] loading encodings...")
data = pickle.loads(open(args["encodings"], "rb").read())
# initialize the pointer to the video file and the video writer
print("[INFO] processing video...")
stream = cv2.VideoCapture(args["input"])
writer = None
# loop over frames from the video file stream
while True:
# grab the next frame
(grabbed, frame) = stream.read()
# if the frame was not grabbed, then we have reached the
# end of the stream
if not grabbed:
break
# convert the input frame from BGR to RGB then resize it to have
# a width of 750px (to speedup processing)
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
rgb = imutils.resize(frame, width=750)
r = frame.shape[1] / float(rgb.shape[1])
# detect the (x, y)-coordinates of the bounding boxes
# corresponding to each face in the input frame, then compute
# the facial embeddings for each face
boxes = face_recognition.face_locations(rgb,
model=args["detection_method"])
encodings = face_recognition.face_encodings(rgb, boxes)
names = []
# loop over the facial embeddings
for encoding in encodings:
# attempt to match each face in the input image to our known
# encodings
matches = face_recognition.compare_faces(data["encodings"], encoding, tolerance=args['tolerance'])
name = "Unknown"
# check to see if we have found a match
if True in matches:
# find the indexes of all matched faces then initialize a
# dictionary to count the total number of times each face
# was matched
matchedIdxs = [i for (i, b) in enumerate(matches) if b]
counts = {}
# loop over the matched indexes and maintain a count for
# each recognized face face
for i in matchedIdxs:
name = data["names"][i]
counts[name] = counts.get(name, 0) + 1
# determine the recognized face with the largest number
# of votes (note: in the event of an unlikely tie Python
# will select first entry in the dictionary)
name = max(counts, key=counts.get)
# update the list of names
names.append(name)
# loop over the recognized faces
for ((top, right, bottom, left), name) in zip(boxes, names):
# rescale the face coordinates
top = int(top * r)
right = int(right * r)
bottom = int(bottom * r)
left = int(left * r)
# draw the predicted face name on the image
cv2.rectangle(frame, (left, top), (right, bottom),
(0, 255, 0), 2)
y = top - 15 if top - 15 > 15 else top + 15
cv2.putText(frame, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2)
# if the video writer is None *AND* we are supposed to write
# the output video to disk initialize the writer
if writer is None and args["output"] is not None:
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
writer = cv2.VideoWriter(args["output"], fourcc, 24, (frame.shape[1], frame.shape[0]), True)
# if the writer is not None, write the frame with recognized
# faces t odisk
if writer is not None:
writer.write(frame)
# check to see if we are supposed to display the output frame to
# the screen
if args["display"] > 0:
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
# if the `q` key was pressed, break from the loop
if key == ord("q"):
break
# close the video file pointers
stream.release()
# check to see if the video writer point needs to be released
if writer is not None:
writer.release()
# import the necessary packages
import face_recognition
import argparse
import pickle
import cv2
import os
import shutil
def detectFaces1(tolerance):
# load the known faces and embeddings
print("using a tolerance of: " + str(tolerance))
print('[INFO] loading encodings...')
data = pickle.loads(open("encodings1.pickle", "rb").read())
# delete previous files
folder = 'detectedFaces/'
for filename in os.listdir(folder):
file_path = os.path.join(folder, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print('Failed to delete %s. Reason: %s' % (file_path, e))
flist = os.listdir("keyFrameChanges/")
i = 0
# THE ITEMS INSERTED WITH A LOOP
for image in flist:
# load the input image and convert it from BGR to RGB
image = cv2.imread("keyFrameChanges/" + image)
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
''' detect the (x, y)-coordinates of the bounding boxes corresponding to each
face in the input image, then compute the facial embeddings for each face
'''
print('[INFO] recognizing faces...')
boxes = face_recognition.face_locations(rgb, model="hog")
encodings = face_recognition.face_encodings(rgb, boxes)
# initialie the list of names for each face detected
names = []
# loop over the facial embeddings
for encoding in encodings:
# attempt to match each face in the input image to our known encodings
# tolerance=0.4 turned to be the most optimum for me
matches = face_recognition.compare_faces(data['encodings'], encoding, tolerance=tolerance)
name = 'Unknown'
# check to see if we have found a match
if True in matches:
'''find the indexes of all matched faces then initialize a dictionary
to count the total number of times each face was matched
'''
matchedIdxs=[i for (i, b) in enumerate(matches) if b]
counts = {}
# loop over the recognized faces
for ((top, right, bottom, left), name) in zip(boxes, names):
# draw the predicted face name on the image
cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)
y = top - 15 if top - 15 > 15 else top + 15
cv2.putText(image, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2)
i += 1
cv2.imwrite('detectedFaces/' + name + str(i) + '.jpg', image)
print("Done detecting faces")
return True
detectFaces1(0.5)
# import the necessary packages
from imutils import paths
import face_recognition
import argparse
import pickle
import cv2
import os
# parse arguments
ap = argparse.ArgumentParser()
ap.add_argument('-i', '--dataset', required=True, help='path to input directory of faces + images')
ap.add_argument('-e', '--encodings', required=True, help='path to serialized db of facial encodings')
ap.add_argument('-d', '--detection-method', type=str, default='cnn', help='face detection model to use: either `hog` or `cnn`')
args = vars(ap.parse_args())
# grab the paths to the input images in our dataset
print('[INFO] quantifying faces...')
imagePaths = list(paths.list_images(args['dataset']))
# initialize the list of known encodings and known names
knownEncodings = []
knownNames = []
# loop over the image paths
for (i ,imagePath) in enumerate(imagePaths):
# extract the person name from the image path
print('[INFO] processing image {}/{}'.format(i + 1, len(imagePaths)))
name = imagePath.split(os.path.sep)[-2]
# load the input image and convert it from BGR (OpenCV ordering) to dlib ordering (RGB)
image = cv2.imread(imagePath)
rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
#detect the (x, y)-coordinates of the bounding boxes corresponding to each face in the input image
boxes = face_recognition.face_locations(rgb, model=args['detection_method'])
# compute the facial embedding for the face
encodings = face_recognition.face_encodings(rgb, boxes)
# loop over the encodings
for encoding in encodings:
# add each encoding + name to our set of known names and encodings
knownEncodings.append(encoding)
knownNames.append(name)
# dump the facial encodings + names to disk
print('[INFO] serializing encodings...')
data = {'encodings': knownEncodings, 'names': knownNames}
f = open(args['encodings'], 'wb')
f.write(pickle.dumps(data))
f.close()
%matplotlib inline
import cv2
import pytesseract
from matplotlib import pyplot as plt
img = cv2.imread('1250_frame.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#gray = cv2.threshold(img, 125, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C)
gray = cv2.threshold(gray[200:300, 50:500], 200, 255, cv2.THRESH_BINARY)[1]
#gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
#gray = cv2.medianBlur(img, 1)
# gray = cv2.GaussianBlur(gray, (21, 21), 0)
#gray = cv2.Canny(gray, 15, 15)
'''I don"t know what I am doing here!'''
plt.imshow(gray)
#plt.imshow(gray)
# I used pytesseract to read the text. You can find install it using pip
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment