Muhammad4hmed/ensemble_detection.py

## ensemble_detection.py
def extract_w_h(image_id_check):
    # Check weight height order
    w, h = test_df.loc[test_df.id==image_id,['dim1', 'dim0']].values[0]
    return w, h

def extract_data(data_list, w, h):

    labels = []
    scores_list=[]
    boxes_list = []
    for i in range(0, len(data_list), 6):
#         print(data_list[i])
        label = int(data_list[i])
        labels.append(int(data_list[i]))
        scores_list.append(float(data_list[i + 1]))
        x_min = float(data_list[i + 2]) /w
        y_min = float(data_list[i + 3]) /h
        x_max = float(data_list[i + 4]) /w
        y_max = float(data_list[i + 5]) /h
        boxes_list.append([x_min, y_min, x_max, y_max])
    return labels, scores_list, boxes_list

def scale_data(boxes, img_height, img_width):
    boxes[:, 0] = boxes[:, 0] * img_width
    boxes[:, 1] = boxes[:, 1] * img_height
    boxes[:, 2] = boxes[:, 2] * img_width
    boxes[:, 3] = boxes[:, 3] * img_height
    return boxes

def convert_data_to_row(boxes, scores, labels):
    data_lst = []
    for i in range(len(boxes)):
        data_lst.append(str(int(labels[i])))
        data_lst.append(str(scores[i]))
        data_lst.append(str(boxes[i][0]))
        data_lst.append(str(boxes[i][1]))
        data_lst.append(str(boxes[i][2]))
        data_lst.append(str(boxes[i][3]))
    data = ' '.join(data_lst)
    return data

image_id_lst = yolo_pred_df['id'].unique()
image_id_lst

weights = [1, 1, 2]

iou_thr = 0.5
skip_box_thr = 0.03
sigma = 0.1
def fusion_boxes(image_id):
    w, h = extract_w_h(image_id_check = image_id)
    final_boxes = []
    final_labels = []
    final_scores = []

    yolo_pred_1= yolo_pred_df[yolo_pred_df['id'] == image_id]['PredictionString'].values[0]
    yololabels, yoloscores_list, yoloboxes_list = extract_data(yolo_pred_1.split(' '),w,h)

    mmdet_pred_1= mmdet_pred_df[mmdet_pred_df['id'] == image_id]['PredictionString'].values[0][0:-1]
    mmdetlabels, mmdetscores_list, mmdetboxes_list = extract_data(mmdet_pred_1.split(' '),w,h)

    yolo_pred_2= pred_df_old[pred_df_old['id'] == image_id]['PredictionString'].values[0]
    yololabels2, yoloscores_list2, yoloboxes_list2 = extract_data(yolo_pred_2.split(' '),w,h)

    final_boxes.append(yoloboxes_list)
    final_boxes.append(mmdetboxes_list)
    final_boxes.append(yoloboxes_list2)
#     print(final_boxes)
    final_labels.append(yololabels)
    final_labels.append(mmdetlabels)
    final_labels.append(yololabels2)

    final_scores.append(yoloscores_list)
    final_scores.append(mmdetscores_list)
    final_scores.append(yoloscores_list2)

#     print(final_scores)
    # fusion
    boxes, scores, labels = weighted_boxes_fusion(final_boxes, final_scores, final_labels, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = scale_data(boxes, h, w)
    boxes = boxes.astype('int')

    merged_data = convert_data_to_row(boxes, scores, scores)
    merged_data = pd.DataFrame([[image_id, merged_data]], columns=['id', 'PredictionString'])
    return merged_data
merged_df_final= pd.DataFrame(columns=['id', 'PredictionString'])
merged_df_final

for image_id in tqdm(image_id_lst):
    merged_data_final = fusion_boxes(image_id)
    merged_df_final = merged_df_final.append(merged_data_final, ignore_index=True)

######## TO PLOT ##############

box2,scores2,labels2 = extract_info(pred_df['PredictionString'][0])
box2 = (np.array(box2) * 10000).astype('int')
import pydicom as dicom
import matplotlib.pylab as plt

# specify your image path
image_path = '/kaggle/input/siim-covid19-detection/train/00086460a852/9e8302230c91/65761e66de9f.dcm'
ds = dicom.dcmread(image_path)

ds_arr = ds.pixel_array
import cv2

for box in box2:
    cv2.rectangle(ds_arr, (box[0], box[1]), (box[2], box[3]), (255,255,255), 2)
plt.imshow(ds_arr)


########### SIM winner ensemble

for _, row in grp.iterrows():
        pred =  0.3*eb5_study_pred[row['imageid']] + \
                0.2*eb6_study_pred[row['imageid']] + \
                0.2*eb7_study_pred[row['imageid']] + \
                0.3*sr152_study_pred[row['imageid']]

        preds.append(pred)

        boxes1, scores1, labels1, img_width, img_height = load_dict('./det_predictions/tf_efficientdet_d7_768_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))

        boxes2, scores2, labels2, img_width2, img_height2 = load_dict('./det_predictions/yolov5x6_768_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
        assert img_width2 == img_width and img_height2 == img_height

        boxes3, scores3, labels3, img_width3, img_height3 = load_dict('./det_predictions/resnet200d_768_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
        assert img_width3 == img_width and img_height3 == img_height

        boxes4, scores4, labels4, img_width4, img_height4 = load_dict('./det_predictions/resnet101d_1024_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
        assert img_width4 == img_width and img_height4 == img_height

        boxes = boxes1 + boxes2 + boxes3 + boxes4
        labels = labels1 + labels2 + labels3 + labels4

        ### scale score of fasterrcnn to effdet and yolo score
        scores3_tmp = []
        for s in scores3:
            tmp = [x*0.78 for x in s]
            scores3_tmp.append(tmp)
        scores3 = scores3_tmp

        scores4_tmp = []
        for s in scores4:
            tmp = [x*0.78 for x in s]
            scores4_tmp.append(tmp)
        scores4 = scores4_tmp

        scores = scores1 + scores2 + scores3 + scores4

        boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=0.6)
        assert np.mean(labels) == 0
        boxes = boxes.clip(0,1)

        boxes[:,[0,2]] = boxes[:,[0,2]]*float(img_width)
        boxes[:,[1,3]] = boxes[:,[1,3]]*float(img_height)

        neg_image_pred = 'none {} 0 0 1 1'.format(pred[0])
        opacity_image_pred = []
        for box, score in zip(boxes, scores):
            opacity_image_pred.append('opacity {} {} {} {} {}'.format(score, box[0], box[1], box[2],box[3]))
        image_pred = ' '.join([neg_image_pred] + opacity_image_pred)
        submission_image_output.append(['{}_image'.format(row['imageid']), image_pred])
	def extract_w_h(image_id_check):
	# Check weight height order
	w, h = test_df.loc[test_df.id==image_id,['dim1', 'dim0']].values[0]
	return w, h

	def extract_data(data_list, w, h):

	labels = []
	scores_list=[]
	boxes_list = []
	for i in range(0, len(data_list), 6):
	# print(data_list[i])
	label = int(data_list[i])
	labels.append(int(data_list[i]))
	scores_list.append(float(data_list[i + 1]))
	x_min = float(data_list[i + 2]) /w
	y_min = float(data_list[i + 3]) /h
	x_max = float(data_list[i + 4]) /w
	y_max = float(data_list[i + 5]) /h
	boxes_list.append([x_min, y_min, x_max, y_max])
	return labels, scores_list, boxes_list

	def scale_data(boxes, img_height, img_width):
	boxes[:, 0] = boxes[:, 0] * img_width
	boxes[:, 1] = boxes[:, 1] * img_height
	boxes[:, 2] = boxes[:, 2] * img_width
	boxes[:, 3] = boxes[:, 3] * img_height
	return boxes

	def convert_data_to_row(boxes, scores, labels):
	data_lst = []
	for i in range(len(boxes)):
	data_lst.append(str(int(labels[i])))
	data_lst.append(str(scores[i]))
	data_lst.append(str(boxes[i][0]))
	data_lst.append(str(boxes[i][1]))
	data_lst.append(str(boxes[i][2]))
	data_lst.append(str(boxes[i][3]))
	data = ' '.join(data_lst)
	return data

	image_id_lst = yolo_pred_df['id'].unique()
	image_id_lst

	weights = [1, 1, 2]

	iou_thr = 0.5
	skip_box_thr = 0.03
	sigma = 0.1
	def fusion_boxes(image_id):
	w, h = extract_w_h(image_id_check = image_id)
	final_boxes = []
	final_labels = []
	final_scores = []

	yolo_pred_1= yolo_pred_df[yolo_pred_df['id'] == image_id]['PredictionString'].values[0]
	yololabels, yoloscores_list, yoloboxes_list = extract_data(yolo_pred_1.split(' '),w,h)

	mmdet_pred_1= mmdet_pred_df[mmdet_pred_df['id'] == image_id]['PredictionString'].values[0][0:-1]
	mmdetlabels, mmdetscores_list, mmdetboxes_list = extract_data(mmdet_pred_1.split(' '),w,h)

	yolo_pred_2= pred_df_old[pred_df_old['id'] == image_id]['PredictionString'].values[0]
	yololabels2, yoloscores_list2, yoloboxes_list2 = extract_data(yolo_pred_2.split(' '),w,h)

	final_boxes.append(yoloboxes_list)
	final_boxes.append(mmdetboxes_list)
	final_boxes.append(yoloboxes_list2)
	# print(final_boxes)
	final_labels.append(yololabels)
	final_labels.append(mmdetlabels)
	final_labels.append(yololabels2)

	final_scores.append(yoloscores_list)
	final_scores.append(mmdetscores_list)
	final_scores.append(yoloscores_list2)

	# print(final_scores)
	# fusion
	boxes, scores, labels = weighted_boxes_fusion(final_boxes, final_scores, final_labels, weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
	boxes = scale_data(boxes, h, w)
	boxes = boxes.astype('int')

	merged_data = convert_data_to_row(boxes, scores, scores)
	merged_data = pd.DataFrame([[image_id, merged_data]], columns=['id', 'PredictionString'])
	return merged_data
	merged_df_final= pd.DataFrame(columns=['id', 'PredictionString'])
	merged_df_final

	for image_id in tqdm(image_id_lst):
	merged_data_final = fusion_boxes(image_id)
	merged_df_final = merged_df_final.append(merged_data_final, ignore_index=True)

	######## TO PLOT ##############

	box2,scores2,labels2 = extract_info(pred_df['PredictionString'][0])
	box2 = (np.array(box2) * 10000).astype('int')
	import pydicom as dicom
	import matplotlib.pylab as plt

	# specify your image path
	image_path = '/kaggle/input/siim-covid19-detection/train/00086460a852/9e8302230c91/65761e66de9f.dcm'
	ds = dicom.dcmread(image_path)

	ds_arr = ds.pixel_array
	import cv2

	for box in box2:
	cv2.rectangle(ds_arr, (box[0], box[1]), (box[2], box[3]), (255,255,255), 2)
	plt.imshow(ds_arr)







	########### SIM winner ensemble

	for _, row in grp.iterrows():
	pred = 0.3*eb5_study_pred[row['imageid']] + \
	0.2*eb6_study_pred[row['imageid']] + \
	0.2*eb7_study_pred[row['imageid']] + \
	0.3*sr152_study_pred[row['imageid']]

	preds.append(pred)

	boxes1, scores1, labels1, img_width, img_height = load_dict('./det_predictions/tf_efficientdet_d7_768_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))

	boxes2, scores2, labels2, img_width2, img_height2 = load_dict('./det_predictions/yolov5x6_768_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
	assert img_width2 == img_width and img_height2 == img_height

	boxes3, scores3, labels3, img_width3, img_height3 = load_dict('./det_predictions/resnet200d_768_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
	assert img_width3 == img_width and img_height3 == img_height

	boxes4, scores4, labels4, img_width4, img_height4 = load_dict('./det_predictions/resnet101d_1024_fold0_1_2_3_4_test_pred/{}.pkl'.format(row['imageid']))
	assert img_width4 == img_width and img_height4 == img_height

	boxes = boxes1 + boxes2 + boxes3 + boxes4
	labels = labels1 + labels2 + labels3 + labels4

	### scale score of fasterrcnn to effdet and yolo score
	scores3_tmp = []
	for s in scores3:
	tmp = [x*0.78 for x in s]
	scores3_tmp.append(tmp)
	scores3 = scores3_tmp

	scores4_tmp = []
	for s in scores4:
	tmp = [x*0.78 for x in s]
	scores4_tmp.append(tmp)
	scores4 = scores4_tmp

	scores = scores1 + scores2 + scores3 + scores4

	boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=0.6)
	assert np.mean(labels) == 0
	boxes = boxes.clip(0,1)

	boxes[:,[0,2]] = boxes[:,[0,2]]*float(img_width)
	boxes[:,[1,3]] = boxes[:,[1,3]]*float(img_height)

	neg_image_pred = 'none {} 0 0 1 1'.format(pred[0])
	opacity_image_pred = []
	for box, score in zip(boxes, scores):
	opacity_image_pred.append('opacity {} {} {} {} {}'.format(score, box[0], box[1], box[2],box[3]))
	image_pred = ' '.join([neg_image_pred] + opacity_image_pred)
	submission_image_output.append(['{}_image'.format(row['imageid']), image_pred])