Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save knight76/e1a11e9e05391a78c5cf36f42d1f7053 to your computer and use it in GitHub Desktop.
Save knight76/e1a11e9e05391a78c5cf36f42d1f7053 to your computer and use it in GitHub Desktop.
Coursera-Python-Project-pillow-tesseract-opencv(3)
import math
import zipfile
from PIL import Image, ImageOps, ImageDraw
import pytesseract
import cv2 as cv
import numpy as np
face_cascade = cv.CascadeClassifier('readonly/haarcascade_frontalface_default.xml')
parsed_img_src = {}
with zipfile.ZipFile('readonly/images.zip', 'r') as archive:
for entry in archive.infolist():
with archive.open(entry) as file:
img = Image.open(file).convert('RGB')
parsed_img_src[entry.filename] = {'pil_img':img}
for img_name in parsed_img_src.keys():
open_cv_image = np.array(parsed_img_src[img_name]['pil_img'])
img_g = cv.cvtColor(open_cv_image, cv.COLOR_BGR2GRAY)
faces_bounding_boxes = face_cascade.detectMultiScale(img_g, 1.3, 5)
parsed_img_src[img_name]['faces'] = []
for x,y,w,h in faces_bounding_boxes:
face = parsed_img_src[img_name]['pil_img'].crop((x,y,x+w,y+h))
parsed_img_src[img_name]['faces'].append(face)
for img_name in parsed_img_src.keys():
for face in parsed_img_src[img_name]['faces']:
face.thumbnail((100,100),Image.ANTIALIAS)
#search the keyword in every page's text and return the faces
def search(keyword):
for img_name in parsed_img_src:
if (keyword in parsed_img_src[img_name]['text']):
if(len(parsed_img_src[img_name]['faces']) != 0):
print("Result found in file {}".format(img_name))
h = math.ceil(len(parsed_img_src[img_name]['faces'])/5)
contact_sheet=Image.new('RGB',(500, 100*h))
xc = 0
yc = 0
for img in parsed_img_src[img_name]['faces']:
contact_sheet.paste(img, (xc, yc))
if xc + 100 == contact_sheet.width:
xc = 0
yc += 100
else:
xc += 100
display(contact_sheet)
else:
print("Result found in file {} \nBut there were no faces in that file\n\n".format(img_name))
return
# search #1
search('Christopher')
# search #2
search('Mark')
# search #3
search('pizza')
@knight76
Copy link
Author

result1

result2

result3

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment