tatmush/breakIntoFrames.py

## breakIntoFrames.py
# import the necessary packages
import face_recognition
import argparse
import imutils
import pickle
import time
import cv2

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-e", "--encodings", required=True, help="path to serialized db of facial encodings")
ap.add_argument("-i", "--input", required=True, help="path to input video")
ap.add_argument("-o", "--output", type=str, help="path to output video")
ap.add_argument("-y", "--display", type=int, default=1, help="whether or not to display output frame to screen")
ap.add_argument('-t', '--tolerance', type=float, default='0.6', help='How much distance between faces to consider it a match. Lower is more strict. 0.6 is typical best performance.')
ap.add_argument("-d", "--detection-method", type=str, default="cnn", 	help="face detection model to use: either `hog` or `cnn`")
args = vars(ap.parse_args())

# load the known faces and embeddings
print("[INFO] loading encodings...")
data = pickle.loads(open(args["encodings"], "rb").read())

# initialize the pointer to the video file and the video writer
print("[INFO] processing video...")
stream = cv2.VideoCapture(args["input"])
writer = None

# loop over frames from the video file stream
while True:
	# grab the next frame
	(grabbed, frame) = stream.read()

	# if the frame was not grabbed, then we have reached the
	# end of the stream
	if not grabbed:
		break

	# convert the input frame from BGR to RGB then resize it to have
	# a width of 750px (to speedup processing)
	rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	rgb = imutils.resize(frame, width=750)
	r = frame.shape[1] / float(rgb.shape[1])

	# detect the (x, y)-coordinates of the bounding boxes
	# corresponding to each face in the input frame, then compute
	# the facial embeddings for each face
	boxes = face_recognition.face_locations(rgb,
		model=args["detection_method"])
	encodings = face_recognition.face_encodings(rgb, boxes)
	names = []

	# loop over the facial embeddings
	for encoding in encodings:
		# attempt to match each face in the input image to our known
		# encodings
		matches = face_recognition.compare_faces(data["encodings"], encoding, tolerance=args['tolerance'])
		name = "Unknown"

		# check to see if we have found a match
		if True in matches:
			# find the indexes of all matched faces then initialize a
			# dictionary to count the total number of times each face
			# was matched
			matchedIdxs = [i for (i, b) in enumerate(matches) if b]
			counts = {}

			# loop over the matched indexes and maintain a count for
			# each recognized face face
			for i in matchedIdxs:
				name = data["names"][i]
				counts[name] = counts.get(name, 0) + 1

			# determine the recognized face with the largest number
			# of votes (note: in the event of an unlikely tie Python
			# will select first entry in the dictionary)
			name = max(counts, key=counts.get)

		# update the list of names
		names.append(name)

	# loop over the recognized faces
	for ((top, right, bottom, left), name) in zip(boxes, names):
		# rescale the face coordinates
		top = int(top * r)
		right = int(right * r)
		bottom = int(bottom * r)
		left = int(left * r)

		# draw the predicted face name on the image
		cv2.rectangle(frame, (left, top), (right, bottom),
			(0, 255, 0), 2)
		y = top - 15 if top - 15 > 15 else top + 15
		cv2.putText(frame, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2)

	# if the video writer is None *AND* we are supposed to write
	# the output video to disk initialize the writer
	if writer is None and args["output"] is not None:
		fourcc = cv2.VideoWriter_fourcc(*"MJPG")
		writer = cv2.VideoWriter(args["output"], fourcc, 24, (frame.shape[1], frame.shape[0]), True)

	# if the writer is not None, write the frame with recognized
	# faces t odisk
	if writer is not None:
		writer.write(frame)

	# check to see if we are supposed to display the output frame to
	# the screen
	if args["display"] > 0:
		cv2.imshow("Frame", frame)
		key = cv2.waitKey(1) & 0xFF

		# if the `q` key was pressed, break from the loop
		if key == ord("q"):
			break

# close the video file pointers
stream.release()

# check to see if the video writer point needs to be released
if writer is not None:
	writer.release()

## detectFaces1.py
# import the necessary packages
import face_recognition
import argparse
import pickle
import cv2
import os
import shutil

def detectFaces1(tolerance):
    # load the known faces and embeddings
    print("using a tolerance of: " + str(tolerance))
    print('[INFO] loading encodings...')
    data = pickle.loads(open("encodings1.pickle", "rb").read())

    # delete previous files
    folder = 'detectedFaces/'
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        except Exception as e:
            print('Failed to delete %s. Reason: %s' % (file_path, e))

    flist = os.listdir("keyFrameChanges/")

    i = 0
    # THE ITEMS INSERTED WITH A LOOP
    for image in flist:

        # load the input image and convert it from BGR to RGB
        image = cv2.imread("keyFrameChanges/" + image)
        rgb  = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        ''' detect the (x, y)-coordinates of the bounding boxes corresponding to each
        face in the input image, then compute the facial embeddings for each face
        '''

        print('[INFO] recognizing faces...')
        boxes = face_recognition.face_locations(rgb, model="hog")
        encodings = face_recognition.face_encodings(rgb, boxes)

        # initialie the list of names for each face detected
        names = []

        # loop over the facial embeddings
        for encoding in encodings:
            # attempt to match each face in the input image to our known encodings
            # tolerance=0.4 turned to be the most optimum for me
            matches = face_recognition.compare_faces(data['encodings'], encoding, tolerance=tolerance)
            name = 'Unknown'

            # check to see if we have found a match
            if True in matches:
                '''find the indexes of all matched faces then initialize a dictionary
                to count the total number of times each face was matched
                '''
                matchedIdxs=[i for (i, b) in enumerate(matches) if b]
                counts = {}

                # loop over the recognized faces
                for ((top, right, bottom, left), name) in zip(boxes, names):
                    # draw the predicted face name on the image
                    cv2.rectangle(image, (left, top), (right, bottom), (0, 255, 0), 2)
                    y = top - 15 if top - 15 > 15 else top + 15
                    cv2.putText(image, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2)

                    i += 1
                    cv2.imwrite('detectedFaces/' + name + str(i) + '.jpg', image)


    print("Done detecting faces")
    return True

detectFaces1(0.5)


## encode_faces.py
# import the necessary packages
from imutils import paths
import face_recognition
import argparse
import pickle
import cv2
import os

# parse arguments
ap = argparse.ArgumentParser()
ap.add_argument('-i', '--dataset', required=True, help='path to input directory of faces + images')
ap.add_argument('-e', '--encodings', required=True, help='path to serialized db of facial encodings')
ap.add_argument('-d', '--detection-method', type=str, default='cnn', help='face detection model to use: either `hog` or `cnn`')
args = vars(ap.parse_args())

# grab the paths to the input images in our dataset
print('[INFO] quantifying faces...')
imagePaths = list(paths.list_images(args['dataset']))

# initialize the list of known encodings and known names
knownEncodings = []
knownNames = []

# loop over the image paths
for (i ,imagePath) in enumerate(imagePaths):
    # extract the person name from the image path
    print('[INFO] processing image {}/{}'.format(i + 1, len(imagePaths)))
    name = imagePath.split(os.path.sep)[-2]

    # load the input image and convert it from BGR (OpenCV ordering) to dlib ordering (RGB)
    image = cv2.imread(imagePath)
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    #detect the (x, y)-coordinates of the bounding boxes corresponding to each face in the input image
    boxes = face_recognition.face_locations(rgb, model=args['detection_method'])

    # compute the facial embedding for the face
    encodings = face_recognition.face_encodings(rgb, boxes)

    # loop over the encodings
    for encoding in encodings:
        # add each encoding + name to our set of known names and encodings
        knownEncodings.append(encoding)
        knownNames.append(name)

    # dump the facial encodings + names to disk
    print('[INFO] serializing encodings...')
    data = {'encodings': knownEncodings, 'names': knownNames}
    f = open(args['encodings'], 'wb')
    f.write(pickle.dumps(data))
    f.close()

## gistfile1.txt
%matplotlib inline

import cv2
import pytesseract
from matplotlib import pyplot as plt

img = cv2.imread('1250_frame.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

#gray = cv2.threshold(img, 125, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C)
gray = cv2.threshold(gray[200:300, 50:500], 200, 255, cv2.THRESH_BINARY)[1]
#gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
#gray = cv2.medianBlur(img, 1)
# gray = cv2.GaussianBlur(gray, (21, 21), 0)
#gray = cv2.Canny(gray, 15, 15)
'''I don"t know what I am doing here!'''
plt.imshow(gray)
#plt.imshow(gray)
# I used pytesseract to read the text. You can find install it using pip
plt.show()
	# import the necessary packages
	import face_recognition
	import argparse
	import imutils
	import pickle
	import time
	import cv2

	# construct the argument parser and parse the arguments
	ap = argparse.ArgumentParser()
	ap.add_argument("-e", "--encodings", required=True, help="path to serialized db of facial encodings")
	ap.add_argument("-i", "--input", required=True, help="path to input video")
	ap.add_argument("-o", "--output", type=str, help="path to output video")
	ap.add_argument("-y", "--display", type=int, default=1, help="whether or not to display output frame to screen")
	ap.add_argument('-t', '--tolerance', type=float, default='0.6', help='How much distance between faces to consider it a match. Lower is more strict. 0.6 is typical best performance.')
	ap.add_argument("-d", "--detection-method", type=str, default="cnn", help="face detection model to use: either `hog` or `cnn`")
	args = vars(ap.parse_args())

	# load the known faces and embeddings
	print("[INFO] loading encodings...")
	data = pickle.loads(open(args["encodings"], "rb").read())

	# initialize the pointer to the video file and the video writer
	print("[INFO] processing video...")
	stream = cv2.VideoCapture(args["input"])
	writer = None

	# loop over frames from the video file stream
	while True:
	# grab the next frame
	(grabbed, frame) = stream.read()

	# if the frame was not grabbed, then we have reached the
	# end of the stream
	if not grabbed:
	break

	# convert the input frame from BGR to RGB then resize it to have
	# a width of 750px (to speedup processing)
	rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	rgb = imutils.resize(frame, width=750)
	r = frame.shape[1] / float(rgb.shape[1])

	# detect the (x, y)-coordinates of the bounding boxes
	# corresponding to each face in the input frame, then compute
	# the facial embeddings for each face
	boxes = face_recognition.face_locations(rgb,
	model=args["detection_method"])
	encodings = face_recognition.face_encodings(rgb, boxes)
	names = []

	# loop over the facial embeddings
	for encoding in encodings:
	# attempt to match each face in the input image to our known
	# encodings
	matches = face_recognition.compare_faces(data["encodings"], encoding, tolerance=args['tolerance'])
	name = "Unknown"

	# check to see if we have found a match
	if True in matches:
	# find the indexes of all matched faces then initialize a
	# dictionary to count the total number of times each face
	# was matched
	matchedIdxs = [i for (i, b) in enumerate(matches) if b]
	counts = {}

	# loop over the matched indexes and maintain a count for
	# each recognized face face
	for i in matchedIdxs:
	name = data["names"][i]
	counts[name] = counts.get(name, 0) + 1

	# determine the recognized face with the largest number
	# of votes (note: in the event of an unlikely tie Python
	# will select first entry in the dictionary)
	name = max(counts, key=counts.get)

	# update the list of names
	names.append(name)

	# loop over the recognized faces
	for ((top, right, bottom, left), name) in zip(boxes, names):
	# rescale the face coordinates
	top = int(top * r)
	right = int(right * r)
	bottom = int(bottom * r)
	left = int(left * r)

	# draw the predicted face name on the image
	cv2.rectangle(frame, (left, top), (right, bottom),
	(0, 255, 0), 2)
	y = top - 15 if top - 15 > 15 else top + 15
	cv2.putText(frame, name, (left, y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 255, 0), 2)

	# if the video writer is None AND we are supposed to write
	# the output video to disk initialize the writer
	if writer is None and args["output"] is not None:
	fourcc = cv2.VideoWriter_fourcc(*"MJPG")
	writer = cv2.VideoWriter(args["output"], fourcc, 24, (frame.shape[1], frame.shape[0]), True)

	# if the writer is not None, write the frame with recognized
	# faces t odisk
	if writer is not None:
	writer.write(frame)

	# check to see if we are supposed to display the output frame to
	# the screen
	if args["display"] > 0:
	cv2.imshow("Frame", frame)
	key = cv2.waitKey(1) & 0xFF

	# if the `q` key was pressed, break from the loop
	if key == ord("q"):
	break

	# close the video file pointers
	stream.release()

	# check to see if the video writer point needs to be released
	if writer is not None:
	writer.release()
	# import the necessary packages
	from imutils import paths
	import face_recognition
	import argparse
	import pickle
	import cv2
	import os

	# parse arguments
	ap = argparse.ArgumentParser()
	ap.add_argument('-i', '--dataset', required=True, help='path to input directory of faces + images')
	ap.add_argument('-e', '--encodings', required=True, help='path to serialized db of facial encodings')
	ap.add_argument('-d', '--detection-method', type=str, default='cnn', help='face detection model to use: either `hog` or `cnn`')
	args = vars(ap.parse_args())

	# grab the paths to the input images in our dataset
	print('[INFO] quantifying faces...')
	imagePaths = list(paths.list_images(args['dataset']))

	# initialize the list of known encodings and known names
	knownEncodings = []
	knownNames = []

	# loop over the image paths
	for (i ,imagePath) in enumerate(imagePaths):
	# extract the person name from the image path
	print('[INFO] processing image {}/{}'.format(i + 1, len(imagePaths)))
	name = imagePath.split(os.path.sep)[-2]

	# load the input image and convert it from BGR (OpenCV ordering) to dlib ordering (RGB)
	image = cv2.imread(imagePath)
	rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

	#detect the (x, y)-coordinates of the bounding boxes corresponding to each face in the input image
	boxes = face_recognition.face_locations(rgb, model=args['detection_method'])

	# compute the facial embedding for the face
	encodings = face_recognition.face_encodings(rgb, boxes)

	# loop over the encodings
	for encoding in encodings:
	# add each encoding + name to our set of known names and encodings
	knownEncodings.append(encoding)
	knownNames.append(name)

	# dump the facial encodings + names to disk
	print('[INFO] serializing encodings...')
	data = {'encodings': knownEncodings, 'names': knownNames}
	f = open(args['encodings'], 'wb')
	f.write(pickle.dumps(data))
	f.close()
	%matplotlib inline

	import cv2
	import pytesseract
	from matplotlib import pyplot as plt

	img = cv2.imread('1250_frame.jpg')
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	#gray = cv2.threshold(img, 125, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C)
	gray = cv2.threshold(gray[200:300, 50:500], 200, 255, cv2.THRESH_BINARY)[1]
	#gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY \| cv2.THRESH_OTSU)[1]
	#gray = cv2.medianBlur(img, 1)
	# gray = cv2.GaussianBlur(gray, (21, 21), 0)
	#gray = cv2.Canny(gray, 15, 15)
	'''I don"t know what I am doing here!'''
	plt.imshow(gray)
	#plt.imshow(gray)
	# I used pytesseract to read the text. You can find install it using pip
	plt.show()