greg-randall/gist:009817414b928051da96738b0bd15273

## gistfile1.txt
import cv2
import numpy as np
import pytesseract

# Load the image
image = cv2.imread('2021.jpg')

# Convert the image to gray scale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Threshold the image to get the boundaries of the smaller images
# The numbers here will need to be played with to deal with backgrounds that aren't solid white
_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)

# Find contours in the thresholded image
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

for i, contour in enumerate(contours):
    # Get the bounding rectangle of the contour
    x, y, w, h = cv2.boundingRect(contour)

    if (w*h) > 50000: #make sure the image is big enough to be the headshot, if you're not getting anything reduce this size
        # Crop the bounding rectangle from the image
        cropped_image_name = image[y+h:y+h+150, x-66:x+w+66] #tweak to select the area for your names
        # the first pair of numbers ( y+h:y+h+150 ) the 150 is how many pixels BELOW the photograph you want to collect
        # the second set of numbers ( x-66:x+w+66 ) is how far to the left and right of the image you want to get, in case the names are wider than the picture, the first -66 goes to the left of the image, and the +66 goes to the right of the image.

        # Use OCR to recognize the name from the image
        name = pytesseract.image_to_string(cropped_image_name)
        name = name.title()
        name = name.strip()

        if len(name) > 0: #make sure we actually got a name.
            print(name)
            # Crop the bounding rectangle from the image
            cropped_image_headshot = image[y:y+h, x:x+w]

            # Save the cropped images
            cv2.imwrite(f"{i}_{name.replace(' ', '_')}.png", cropped_image_headshot)
            cv2.imwrite(f'{i}.png', cropped_image_name)
	import cv2
	import numpy as np
	import pytesseract

	# Load the image
	image = cv2.imread('2021.jpg')

	# Convert the image to gray scale
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	# Threshold the image to get the boundaries of the smaller images
	# The numbers here will need to be played with to deal with backgrounds that aren't solid white
	_, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)

	# Find contours in the thresholded image
	contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	for i, contour in enumerate(contours):
	# Get the bounding rectangle of the contour
	x, y, w, h = cv2.boundingRect(contour)

	if (w*h) > 50000: #make sure the image is big enough to be the headshot, if you're not getting anything reduce this size
	# Crop the bounding rectangle from the image
	cropped_image_name = image[y+h:y+h+150, x-66:x+w+66] #tweak to select the area for your names
	# the first pair of numbers ( y+h:y+h+150 ) the 150 is how many pixels BELOW the photograph you want to collect
	# the second set of numbers ( x-66:x+w+66 ) is how far to the left and right of the image you want to get, in case the names are wider than the picture, the first -66 goes to the left of the image, and the +66 goes to the right of the image.

	# Use OCR to recognize the name from the image
	name = pytesseract.image_to_string(cropped_image_name)
	name = name.title()
	name = name.strip()

	if len(name) > 0: #make sure we actually got a name.
	print(name)
	# Crop the bounding rectangle from the image
	cropped_image_headshot = image[y:y+h, x:x+w]

	# Save the cropped images
	cv2.imwrite(f"{i}_{name.replace(' ', '_')}.png", cropped_image_headshot)
	cv2.imwrite(f'{i}.png', cropped_image_name)