Skip to content

Instantly share code, notes, and snippets.

@akash-ch2812
Last active January 17, 2024 06:11
Show Gist options
  • Save akash-ch2812/d42acf86e4d6562819cf4cd37d1195e7 to your computer and use it in GitHub Desktop.
Save akash-ch2812/d42acf86e4d6562819cf4cd37d1195e7 to your computer and use it in GitHub Desktop.
Python code for marking regions of interest in an image for OCR
# use this command to install open cv2
# pip install opencv-python
# use this command to install PIL
# pip install Pillow
import cv2
from PIL import Image
def mark_region(imagE_path):
im = cv2.imread(image_path)
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)
# Dilate to combine adjacent text contours
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(thresh, kernel, iterations=4)
# Find contours, highlight text areas, and extract ROIs
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
line_items_coordinates = []
for c in cnts:
area = cv2.contourArea(c)
x,y,w,h = cv2.boundingRect(c)
if y >= 600 and x <= 1000:
if area > 10000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
if y >= 2400 and x<= 2000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
return image, line_items_coordinates
@doughnet
Copy link

doughnet commented Feb 8, 2021

where in this code is it specifying the image path folder?

@jlumbroso
Copy link

This code is very useful and provided by the author, @akash-ch2812, in the context of a terrific Medium article.

There are a few quirks though:

  • imagE_path is misspelled in line 10, it should be image_path.
  • The variable name image is used instead of im in the loop: so the returned image only has the final region. The variable name should be consistently im or image through all occurrences so that the returned image contains all the regions.
  • This code hard-codes assumptions on the input image in lines 31-38 that will not work for generic image files.

Here is a version that addresses these problems:

# use this command to install open cv2
# pip install opencv-python

import cv2

def mark_region(image_path):
    
    image = cv2.imread(image_path)

    # define threshold of regions to ignore
    THRESHOLD_REGION_IGNORE = 40

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (9,9), 0)
    thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)

    # Dilate to combine adjacent text contours
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
    dilate = cv2.dilate(thresh, kernel, iterations=4)

    # Find contours, highlight text areas, and extract ROIs
    cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]

    line_items_coordinates = []
    for c in cnts:
        area = cv2.contourArea(c)
        x, y, w, h = cv2.boundingRect(c)
        
        if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE:
            continue
        
        image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3)
        line_items_coordinates.append([(x,y), (x+w, y+h)])

    return image, line_items_coordinates

And this code does not have usage:

# use this command to install open cv2
# pip install opencv-python

# use this command to install matplotlib
# pip install matplotlib

import cv2
import matplotlib.pyplot as plt

# [... define the mark_region method ...]

FILENAME = "path/to/some_image.jpg"
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)

Hope this answers your question @doughnet!

@doughnet
Copy link

doughnet commented Mar 6, 2021

This code is very useful and provided by the author, @akash-ch2812, in the context of a terrific Medium article.

There are a few quirks though:

  • imagE_path is misspelled in line 10, it should be image_path.
  • The variable name image is used instead of im in the loop: so the returned image only has the final region. The variable name should be consistently im or image through all occurrences so that the returned image contains all the regions.
  • This code hard-codes assumptions on the input image in lines 31-38 that will not work for generic image files.

Here is a version that addresses these problems:

# use this command to install open cv2
# pip install opencv-python

import cv2

def mark_region(image_path):
    
    image = cv2.imread(image_path)

    # define threshold of regions to ignore
    THRESHOLD_REGION_IGNORE = 40

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (9,9), 0)
    thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)

    # Dilate to combine adjacent text contours
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
    dilate = cv2.dilate(thresh, kernel, iterations=4)

    # Find contours, highlight text areas, and extract ROIs
    cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]

    line_items_coordinates = []
    for c in cnts:
        area = cv2.contourArea(c)
        x, y, w, h = cv2.boundingRect(c)
        
        if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE:
            continue
        
        image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3)
        line_items_coordinates.append([(x,y), (x+w, y+h)])

    return image, line_items_coordinates

And this code does not have usage:

# use this command to install open cv2
# pip install opencv-python

# use this command to install matplotlib
# pip install matplotlib

import cv2
import matplotlib.pyplot as plt

# [... define the mark_region method ...]

FILENAME = "path/to/some_image.jpg"
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)

Hope this answers your question @doughnet!

Thank you. I’ll give this another go. Of course a couple days ago my OCR development box got wiped so I’ll start from fresh and hopefully have more thank. Thank you again for the help.

@doughnet
Copy link

doughnet commented Mar 7, 2021

Hope this answers your question @doughnet!

Tried it this evening but it does nothing still with the updated code you provided. I don't understand how running: python3 Marking_ROI.py gives it enough information to specify what folder or image to do. Is it looking for images in the same path or which path? What file names is it looking for? Are all the scripts supposed to be run from another script and not run individually? These is such a lack of information provided in the original medium article ... doesn't help when there are variable errors like imagE_path ....

Would you mind giving step by step with proper details? Should be super simple to follow instructions when they are properly documented. Another error in the Medium article is the very first pdf2image script; nowhere does it mention to change the "provide path to pdf file" to your filename; so simple to just state in the Medium article to replace this and that to get it working instead of wasting user's time.

@jlumbroso
Copy link

@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.

Going back to your question: This is not a script that you can just use externally. You cannot just call python3 Marking_ROI.py to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post the mark_regions() code can be used the following way:

# use this command to install open cv2
# pip install opencv-python

# use this command to install matplotlib
# pip install matplotlib

import cv2
import matplotlib.pyplot as plt

# [... define the mark_region method ...]

FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig("image-with-regions.png") # <--- added this to output an image

where obviously you have to edit FILENAME to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).

@doughnet
Copy link

doughnet commented Mar 7, 2021

@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.

Going back to your question: This is not a script that you can just use externally. You cannot just call python3 Marking_ROI.py to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post the mark_regions() code can be used the following way:

# use this command to install open cv2
# pip install opencv-python

# use this command to install matplotlib
# pip install matplotlib

import cv2
import matplotlib.pyplot as plt

# [... define the mark_region method ...]

FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig("image-with-regions.png") # <--- added this to output an image

where obviously you have to edit FILENAME to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).

Of course thank you for the time spent on “improving” and “correcting” the original author’s scripts. The way I work and pretty everyone in my field is if you’re going to do the work; do it well.

“improved” details would be helpful to know what someone is getting them self into. A simple one line stating to expect modifying the code because it is not complete would help users (or myself) know that it needs to be modified to work.

I’ll try this out again; see if it works.

EDIT: The additional details ended up working. Thank you for adding the few lines of comments and details to have it work properly.

@jlumbroso
Copy link

EDIT: The additional details ended up working. Thank you for adding the few lines of comments and details to have it work properly.

You're welcome.

@akash-ch2812
Copy link
Author

@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.
Going back to your question: This is not a script that you can just use externally. You cannot just call python3 Marking_ROI.py to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post the mark_regions() code can be used the following way:

# use this command to install open cv2
# pip install opencv-python

# use this command to install matplotlib
# pip install matplotlib

import cv2
import matplotlib.pyplot as plt

# [... define the mark_region method ...]

FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig("image-with-regions.png") # <--- added this to output an image

where obviously you have to edit FILENAME to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).

Of course thank you for the time spent on “improving” and “correcting” the original author’s scripts. The way I work and pretty everyone in my field is if you’re going to do the work; do it well.

“improved” details would be helpful to know what someone is getting them self into. A simple one line stating to expect modifying the code because it is not complete would help users (or myself) know that it needs to be modified to work.

I’ll try this out again; see if it works.

EDIT: The additional details ended up working. Thank you for adding the few lines of comments and details to have it work properly.

Thank you for the constructive feedback. My article on medium was particularly written for users exploring the document intelligence domain. As mentioned by @jlumbroso, It was just an introduction to a concept which I thought might be worth sharing with the community. I don't know about your ways of working but for me if Its all about getting the concept first and then jumping on to the code.

The gists here on Github were specifically made as a supporting material for the original article. Again @jlumbroso thank you for helping out with the spell checks and typos.

Hope this answers your questions about the article intent and typos in code.

@akash-ch2812
Copy link
Author

@doughnet I am happy to help you, but I would rather you be thankful both of the original poster's work and my response to you — rather than imply that our instructions are not good enough. They may not be complete enough for you, but as you can see, as a programmer with no prior experience in OpenCV the original poster's article was sufficient for me to understand how to work with it — despite a few typos. Of course explanations can always be improved, but the help you're getting is free.

Going back to your question: This is not a script that you can just use externally. You cannot just call python3 Marking_ROI.py to get what you want. You need to include the code in some larger context to get it to work. As I explained in my post the mark_regions() code can be used the following way:

# use this command to install open cv2
# pip install opencv-python

# use this command to install matplotlib
# pip install matplotlib

import cv2
import matplotlib.pyplot as plt

# [... define the mark_region method ...]

FILENAME = "path/to/some_image.jpg" # <--- change this to be the file you want
image, line_items_coordinates = mark_region(FILENAME)
plt.figure(figsize=(20,20))
plt.imshow(image)
plt.savefig("image-with-regions.png") # <--- added this to output an image

where obviously you have to edit FILENAME to have it point to the file that you want. If you would like to create a command-line tool that can run this on any input image, that is something you can do (fairly easily).

@jlumbroso - Thank you for the kind words 👍

@Matthew-Hiebing
Copy link

Matthew-Hiebing commented Jul 13, 2021

Dear @akash-ch2812 or @jlumbroso,

I'm having some trouble using this code on some PDFs I'm working on, I thought either you might have some ideas on what's going wrong.

I have a single PDF called, "DNR_WFH.pdf". I was able to use the first section of code below to split the PDF into separate pages and name them accordingly.

from pdf2image import convert_from_path

pdfs = r"C:\Users\mhiebing\Desktop\DNR_WFH.pdf"
pages = convert_from_path(pdfs, 350)

i = 1
for page in pages:
    image_name = "Page_" + str(i) + ".jpg"
    page.save(image_name, "JPEG")
    i = i+1

For the second section, I'm only looking at the first JPEG to keep things simple. When I call the mark_region(image_path) function nothing happens. Is there supposed to be a window where I outline the boxes we want to extract text from? Here's what I have for the second section:

import cv2
import matplotlib.pyplot as plt


def mark_region(image_path):

    #define the mark_region method
    FILENAME = r"C:\Users\mhiebing\Documents\GitHub_Repos\MonthlyStatsExtract\Page_1.jpg" # <--- change this to be the file you want
    image, line_items_coordinates = mark_region(FILENAME)
    plt.figure(figsize=(20,20))
    plt.imshow(image)
    plt.savefig("image-with-regions.png") # <--- added this to output an image

    image = cv2.imread(image_path)

    # define threshold of regions to ignore
    THRESHOLD_REGION_IGNORE = 40

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (9,9), 0)
    thresh = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,11,30)

    # Dilate to combine adjacent text contours
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
    dilate = cv2.dilate(thresh, kernel, iterations=4)

    # Find contours, highlight text areas, and extract ROIs
    cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]

    line_items_coordinates = []
    for c in cnts:
        area = cv2.contourArea(c)
        x, y, w, h = cv2.boundingRect(c)

        if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE:
            continue

        image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3)
        line_items_coordinates.append([(x,y), (x+w, y+h)])

    return image, line_items_coordinates

mark_region

Thank you for putting up the article and supplying the code!

@squeezer44
Copy link

@Matthew-Hiebing
I'm just trying to adapt the existing fantastic code and I found your question.

This code snippet of your code is inside the function mark_region but should be outside. This because you are calling the function mark_region(FILENAME) and pass a FILENAME:

    #define the mark_region method
    FILENAME = r"C:\Users\mhiebing\Documents\GitHub_Repos\MonthlyStatsExtract\Page_1.jpg" # <--- change this to be the file you want
    image, line_items_coordinates = mark_region(FILENAME)
    plt.figure(figsize=(20,20))
    plt.imshow(image)
    plt.savefig("image-with-regions.png") # <--- added this to output an image

@RonyMacfly
Copy link

Thank you so much for your work.
There were problems with opencv-python-4.5.5.62.
Use opencv-python==4.1.2.30.

@hasnentai
Copy link

if w < THRESHOLD_REGION_IGNORE or h < THRESHOLD_REGION_IGNORE:
            continue
        
        image = cv2.rectangle(image, (x,y), (x+w, y+h), color=(255,0,255), thickness=3)
        line_items_coordinates.append([(x,y), (x+w, y+h)])

Thanks a lot man, Before I was only getting few area marked but after revamp and using your code it's marking every single area
Thank you once again. Keep up the good work

@aysnrdurak
Copy link

aysnrdurak commented Mar 28, 2023

if we want to optimize this code:

import cv2
from PIL import Image
from pdf2image import convert_from_path

def mark_region(im):
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (9,9), 0)
thresh = cv2.adaptiveThreshold(blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 30)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,9))
dilate = cv2.dilate(thresh, kernel, iterations=4)
cnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
line_items_coordinates = []
for c in cnts:
area = cv2.contourArea(c)
x, y, w, h = cv2.boundingRect(c)
if y >= 600 and x <= 1000 and area > 10000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
elif y >= 2400 and x<= 2000:
image = cv2.rectangle(im, (x,y), (2200, y+h), color=(255,0,255), thickness=3)
line_items_coordinates.append([(x,y), (2200, y+h)])
return image, line_items_coordinates

poppler_path = r 'you should write poppler bin folder path '
pages = convert_from_path("you should write here pdf path", 480, poppler_path=poppler_path)

for i, page in enumerate(pages):
image_name = f"Deneme_{i}.JPEG"
page.save(image_name, "JPEG")
im = cv2.imread(image_name)
marked_image, coordinates = mark_region(im)
cv2.imwrite(f"Marked_{image_name}", marked_image)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment