Skip to content

Instantly share code, notes, and snippets.

@poojapi
Created November 9, 2020 09:58
Show Gist options
  • Save poojapi/7e2b0028933849566671e71a99143c80 to your computer and use it in GitHub Desktop.
Save poojapi/7e2b0028933849566671e71a99143c80 to your computer and use it in GitHub Desktop.
Python detect paragraphs with opencv
import cv2
import numpy as np
def para_detect(file_name):
img = cv2.imread(file_name)
img_final = cv2.imread(file_name)
img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, mask = cv2.threshold(img2gray, 180, 255, cv2.THRESH_BINARY)
image_final = cv2.bitwise_and(img2gray, img2gray, mask=mask)
ret, new_img = cv2.threshold(image_final, 180, 255, cv2.THRESH_BINARY_INV) # for black text , cv.THRESH_BINARY_INV
'''
line 8 to 12 : Remove noisy portion
'''
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (10,
3)) # to manipulate the orientation of dilution , large x means horizonatally dilating more, large y means vertically dilating more
dilated = cv2.dilate(new_img, kernel, iterations=5) # dilate , more the iteration more the dilation
output = cv2.bitwise_not(dilated, dilated)
ais = []
for contour in contours:
# get rectangle bounding contour
[x, y, w, h] = cv2.boundingRect(contour)
a = w * h
ais.append(a)
ais.sort(reverse=True)
for contour in contours:
[x, y, w, h] = cv2.boundingRect(contour)
# Don't plot small false positives that aren't text
#if w < 2050 and h < 450:
# continue
if w*h not in ais[:2]:
continue
# cv2.imshow('w', img)
# cv2.waitKey()
# write original image with added contours to disk
cv2.imwrite('output4.png', img)
file_name = 'n.png'
para_detect(file_name)
@madboulyt
Copy link

contours is not defined.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment