deshwalmahesh/generate_yolo_annot.py

## generate_yolo_annot.py
import cv2
from PIL import Image
import numpy as np
import glob

def select_box(results:np.ndarray,method:str)->int:
    '''
    Select a Single BB based on Max Probability or Max area logic
    args:
        results: Pass in the results by detection module in (classes, scores, boxes) format
        method: Whether to use 'prob' or 'area'
    out:
        Index of the bounding box to select
    '''
    classes, scores, bboxes = results

    if method == 'area':
        return np.argmax([box[2]*box[3] for box in bboxes ])

    return np.argmax(scores)


def bnd_box_to_yolo_line(box:np.ndarray,img_size:np.ndarray)->tuple:
    '''
    Change Bounding Box to YOLO text format
    args:
        box: 1 D array of Bounding box in format [x,y,w,h]
        img_size: 1-D array of Image Size in format [Width, Height, Channels]
    out:
        4 floating point values as (x_center, y_center) are relative points of center of Rectangle. (w,h) is the width and height of Rectangle Relative to Image Size
    '''
    (x_min, y_min) = (box[0], box[1])
    (w, h) = (box[2], box[3])
    x_max = x_min + w
    y_max = y_min + h

    x_center = float((x_min + x_max)) / 2 / img_size[1]
    y_center = float((y_min + y_max)) / 2 / img_size[0]

    w = float((x_max - x_min)) / img_size[1]
    h = float((y_max - y_min)) / img_size[0]

    return x_center, y_center, w, h

def yolo_to_bb(annotations:[tuple,list,np.ndarray], img_size:tuple, return_wh:bool = True):
    '''
    Change the YoloV4 Darknet annotations to Bounding Box Format
    args:
        annotations: Annotations for Darknet YoloV4 format gives as (xc,yc,w,h) iun the .txt file. Exclude class
        img_size: Size of the original Image in format (w,h,c)
        return_wh: Whether to return Widrh, Height or the Max_X, Max_Y
    out:
        Tuple of Values of a Bounding Box as (x_min, y_min, x_max, y_max) or (x_min, y_min, w, h) depending on the third arg
    '''
    (xc,yc,w,h) = annotations

    x_min = (xc*img_size[1]) - ((w*img_size[1])/2)
    x_max = (xc * img_array.shape[1] *2 ) - x_min

    y_min = (yc*img_size[0]) - ((h*img_size[0])/2)
    y_max = (yc * img_array.shape[0] *2 ) - y_min

    if return_wh:
        return int(x_min), int(y_min), int(x_max) - int(x_min) , int(y_max) - int(y_min)

    return int(x_min), int(y_min), int(x_max), int(y_max)


def build_yolo_model(weight_file_path:str,config_file_path:str,size:tuple=(416,416)):
    '''
    Build a Yolo Model
    args:
        weight_file_path: Path to the .weights (Yolo v3,v4 etc) file
        config_file_path: PAth to the .cfg file
        size: Size of the model detection. You can pass in multiple of 32. Works even when you have trained with 416 and now testing on 608
    '''
    net = cv2.dnn.readNet(weight_file_path, config_file_path)
    model = cv2.dnn_DetectionModel(net)
    model.setInputParams(size=size, scale=1/255.)
    return model

def generate_text_annotation(dir_path:str,weight_file_path:str,config_file_path:str,size:tuple=(416,416),CONFIDENCE_THRESHOLD:float=0.51, NMS_THRESHOLD:float=0.51)->None:
    '''
    Generate Annotation File per image for images given in a directory. Uses Bounding Box from the model
    args:
        dir_path: Directory path where your images are downloaded. (./dir/whatever/) We are Assuming that they are in .png format only
        weight_file_path: Path to the .weights (Yolo v3,v4 etc) file
        config_file_path: PAth to the .cfg file
        size: Size of the model detection. You can pass in multiple of 32. Works even when you have trained with 416 and now testing on 608
        CONFIDENCE_THRESHOLD: Only MAke detections valid when the Confidence is above thsi level. Increasing this will lead to FN and decreasing will lead to FP
        NMS_THRESHOLD: Non MAximum Suppression threshold. Decreasing this will give more number of BB per image. Increasing it will give less no of BBs
    '''
    net = cv2.dnn.readNet(weight_file_path, config_file_path)
    model = cv2.dnn_DetectionModel(net)
    model.setInputParams(size=size, scale=1/255.)


    image_names = glob.glob(f'{dir_path}*.png')
    for image_path in image_names:
        annot = [0] # one class annotation. by default 0
        text_file_name = '.'+image_path.split('.')[1]+'.txt'
        img_array =  np.array(Image.open(image_path))

        classes, scores, bboxes = model.detect(img_array, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)

        if type(classes) == tuple:
            continue
        if classes.shape == (1,1): # if onlt 1 detection
            index = 0
        else:
            index = select_box((classes, scores, bboxes),'prob') # if multiple select on Max Area vs Max Prob


        box = bboxes[index]
        score = scores[index]

        annot.extend(bnd_box_to_yolo_line(box,img_array.shape))

        with open(text_file_name,'w')as f:
            f.write(' '.join([str(i) for i in annot]))

    with open(dir_path+'classes.txt','w')as f:
            f.write('Default Class')


def IoU(true_bb:[tuple,list,np.ndarray]=(0,0,0,0), pred_bb:[tuple,list,np.ndarray]=(0,0,0,0))->float:
    '''
    Get the Intersection Over Union of two Bounding Boxes
    args:
        true_bb: Coordinates of True Bounding Box given as (xmin, y_min, x_max, y_max)
        true_bb: Coordinates of Predicted Bounding Box given as (xmin, y_min, x_max, y_max)
    out:
        floating value between 0 and 1 defining IoU of two boxes
    '''
    # Open Image ans see numerator blue box to understand the logic for xA, yA, xB, yB https://www.pyimagesearch.com/wp-content/uploads/2016/09/iou_equation.png
    xA = max(true_bb[0], pred_bb[0]) # xA is the X_min is the max of 2 which will act as the x_min for Intersection Box
    yA = max(true_bb[1], pred_bb[1]) # Same as xA
    xB = min(true_bb[2], pred_bb[2]) # xB is the min of two which will act as the x_max for intersection box
    yB = min(true_bb[3], pred_bb[3]) # same as xB

    intersect_area = max(0, xB - xA+1) * max(0, yB - yA+1) # Width * Height of the blue common box (xB - xA+1) -> Height of blue box. If xa=xb, then Area is 0 that's why max(0,w)

    true_area = (true_bb[2] - true_bb[0] + 1) * (true_bb[3] - true_bb[1] + 1)
    pred_area = (pred_bb[2] - pred_bb[0] + 1) * (pred_bb[3] - pred_bb[1] + 1)

    union_area = true_area + pred_area - intersect_area # subtract the common area once as it will be included 2 times. One for true area and one for pred area

    iou = intersect_area/float(union_area)
    return iou


def pad_resize(img:np.ndarray,width:int=224,height:int=224)->np.ndarray:
  '''
  Pad or Resize the Image for given dimensions. For increasing the size, it keeps the Image in middle by adding padding any color.
  If one dimension (width or height) has to be increased or other has to be decreased, then it'll pad the short size and then resize the bigger dimension
  '''
  x, y, c = img.shape
  if height > y or width > x:
    x_ = (width - x)//2 if width > x else 0
    y_ = (height - y)//2 if height > y else 0
    img = np.pad(img,((y_,y_),(x_,x_),(0,0)),constant_values=255)
    x, y, c = img.shape
  if height < y or width < x:
    img = cv2.resize(img, dsize=(width, height), interpolation=cv2.INTER_CUBIC)

  return img
	import cv2
	from PIL import Image
	import numpy as np
	import glob

	def select_box(results:np.ndarray,method:str)->int:
	'''
	Select a Single BB based on Max Probability or Max area logic
	args:
	results: Pass in the results by detection module in (classes, scores, boxes) format
	method: Whether to use 'prob' or 'area'
	out:
	Index of the bounding box to select
	'''
	classes, scores, bboxes = results

	if method == 'area':
	return np.argmax([box[2]*box[3] for box in bboxes ])

	return np.argmax(scores)


	def bnd_box_to_yolo_line(box:np.ndarray,img_size:np.ndarray)->tuple:
	'''
	Change Bounding Box to YOLO text format
	args:
	box: 1 D array of Bounding box in format [x,y,w,h]
	img_size: 1-D array of Image Size in format [Width, Height, Channels]
	out:
	4 floating point values as (x_center, y_center) are relative points of center of Rectangle. (w,h) is the width and height of Rectangle Relative to Image Size
	'''
	(x_min, y_min) = (box[0], box[1])
	(w, h) = (box[2], box[3])
	x_max = x_min + w
	y_max = y_min + h

	x_center = float((x_min + x_max)) / 2 / img_size[1]
	y_center = float((y_min + y_max)) / 2 / img_size[0]

	w = float((x_max - x_min)) / img_size[1]
	h = float((y_max - y_min)) / img_size[0]

	return x_center, y_center, w, h

	def yolo_to_bb(annotations:[tuple,list,np.ndarray], img_size:tuple, return_wh:bool = True):
	'''
	Change the YoloV4 Darknet annotations to Bounding Box Format
	args:
	annotations: Annotations for Darknet YoloV4 format gives as (xc,yc,w,h) iun the .txt file. Exclude class
	img_size: Size of the original Image in format (w,h,c)
	return_wh: Whether to return Widrh, Height or the Max_X, Max_Y
	out:
	Tuple of Values of a Bounding Box as (x_min, y_min, x_max, y_max) or (x_min, y_min, w, h) depending on the third arg
	'''
	(xc,yc,w,h) = annotations

	x_min = (xcimg_size[1]) - ((wimg_size[1])/2)
	x_max = (xc * img_array.shape[1] *2 ) - x_min

	y_min = (ycimg_size[0]) - ((himg_size[0])/2)
	y_max = (yc * img_array.shape[0] *2 ) - y_min

	if return_wh:
	return int(x_min), int(y_min), int(x_max) - int(x_min) , int(y_max) - int(y_min)

	return int(x_min), int(y_min), int(x_max), int(y_max)



	def build_yolo_model(weight_file_path:str,config_file_path:str,size:tuple=(416,416)):
	'''
	Build a Yolo Model
	args:
	weight_file_path: Path to the .weights (Yolo v3,v4 etc) file
	config_file_path: PAth to the .cfg file
	size: Size of the model detection. You can pass in multiple of 32. Works even when you have trained with 416 and now testing on 608
	'''
	net = cv2.dnn.readNet(weight_file_path, config_file_path)
	model = cv2.dnn_DetectionModel(net)
	model.setInputParams(size=size, scale=1/255.)
	return model

	def generate_text_annotation(dir_path:str,weight_file_path:str,config_file_path:str,size:tuple=(416,416),CONFIDENCE_THRESHOLD:float=0.51, NMS_THRESHOLD:float=0.51)->None:
	'''
	Generate Annotation File per image for images given in a directory. Uses Bounding Box from the model
	args:
	dir_path: Directory path where your images are downloaded. (./dir/whatever/) We are Assuming that they are in .png format only
	weight_file_path: Path to the .weights (Yolo v3,v4 etc) file
	config_file_path: PAth to the .cfg file
	size: Size of the model detection. You can pass in multiple of 32. Works even when you have trained with 416 and now testing on 608
	CONFIDENCE_THRESHOLD: Only MAke detections valid when the Confidence is above thsi level. Increasing this will lead to FN and decreasing will lead to FP
	NMS_THRESHOLD: Non MAximum Suppression threshold. Decreasing this will give more number of BB per image. Increasing it will give less no of BBs
	'''
	net = cv2.dnn.readNet(weight_file_path, config_file_path)
	model = cv2.dnn_DetectionModel(net)
	model.setInputParams(size=size, scale=1/255.)


	image_names = glob.glob(f'{dir_path}*.png')
	for image_path in image_names:
	annot = [0] # one class annotation. by default 0
	text_file_name = '.'+image_path.split('.')[1]+'.txt'
	img_array = np.array(Image.open(image_path))

	classes, scores, bboxes = model.detect(img_array, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)

	if type(classes) == tuple:
	continue
	if classes.shape == (1,1): # if onlt 1 detection
	index = 0
	else:
	index = select_box((classes, scores, bboxes),'prob') # if multiple select on Max Area vs Max Prob


	box = bboxes[index]
	score = scores[index]

	annot.extend(bnd_box_to_yolo_line(box,img_array.shape))

	with open(text_file_name,'w')as f:
	f.write(' '.join([str(i) for i in annot]))

	with open(dir_path+'classes.txt','w')as f:
	f.write('Default Class')


	def IoU(true_bb:[tuple,list,np.ndarray]=(0,0,0,0), pred_bb:[tuple,list,np.ndarray]=(0,0,0,0))->float:
	'''
	Get the Intersection Over Union of two Bounding Boxes
	args:
	true_bb: Coordinates of True Bounding Box given as (xmin, y_min, x_max, y_max)
	true_bb: Coordinates of Predicted Bounding Box given as (xmin, y_min, x_max, y_max)
	out:
	floating value between 0 and 1 defining IoU of two boxes
	'''
	# Open Image ans see numerator blue box to understand the logic for xA, yA, xB, yB https://www.pyimagesearch.com/wp-content/uploads/2016/09/iou_equation.png
	xA = max(true_bb[0], pred_bb[0]) # xA is the X_min is the max of 2 which will act as the x_min for Intersection Box
	yA = max(true_bb[1], pred_bb[1]) # Same as xA
	xB = min(true_bb[2], pred_bb[2]) # xB is the min of two which will act as the x_max for intersection box
	yB = min(true_bb[3], pred_bb[3]) # same as xB

	intersect_area = max(0, xB - xA+1) * max(0, yB - yA+1) # Width * Height of the blue common box (xB - xA+1) -> Height of blue box. If xa=xb, then Area is 0 that's why max(0,w)

	true_area = (true_bb[2] - true_bb[0] + 1) * (true_bb[3] - true_bb[1] + 1)
	pred_area = (pred_bb[2] - pred_bb[0] + 1) * (pred_bb[3] - pred_bb[1] + 1)

	union_area = true_area + pred_area - intersect_area # subtract the common area once as it will be included 2 times. One for true area and one for pred area

	iou = intersect_area/float(union_area)
	return iou


	def pad_resize(img:np.ndarray,width:int=224,height:int=224)->np.ndarray:
	'''
	Pad or Resize the Image for given dimensions. For increasing the size, it keeps the Image in middle by adding padding any color.
	If one dimension (width or height) has to be increased or other has to be decreased, then it'll pad the short size and then resize the bigger dimension
	'''
	x, y, c = img.shape
	if height > y or width > x:
	x_ = (width - x)//2 if width > x else 0
	y_ = (height - y)//2 if height > y else 0
	img = np.pad(img,((y_,y_),(x_,x_),(0,0)),constant_values=255)
	x, y, c = img.shape
	if height < y or width < x:
	img = cv2.resize(img, dsize=(width, height), interpolation=cv2.INTER_CUBIC)

	return img