Skip to content

Instantly share code, notes, and snippets.

@harshraj22
Created September 24, 2021 09:17
Show Gist options
  • Save harshraj22/d7c4517b055f52f9e020b533112d192a to your computer and use it in GitHub Desktop.
Save harshraj22/d7c4517b055f52f9e020b533112d192a to your computer and use it in GitHub Desktop.
Bottom up attention: Feature extraction
# import torch
# import detectron2
# from PIL import Image
# import numpy as np
# from detectron2.modeling import build_model
# from detectron2.config import get_cfg
# from detectron2.structures import ImageList
# from torchinfo import summary
import warnings
warnings.filterwarnings('ignore')
# cfg_files = {
# 'single_output': ['/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml', '/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml']
# }
# cfg = get_cfg() # obtain detectron2's default config
# cfg.merge_from_file('/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml')
# model = build_model(cfg) # returns a torch.nn.Module
# model.eval()
# img = Image.open('/home/prabhu/test/610bc917766a8-Largest_Zoos_In_India.jpeg')
# img = np.array(img)
# img = np.moveaxis(img, -1, 0)
# img_tensor = torch.from_numpy(img).float().cuda()
# images = torch.randn(1, 3, 640, 640).cuda()
# features = model.backbone(images)
# images = ImageList(images, [(640, 640)])
# proposals, _ = model.proposal_generator(images, features)
# instances, _ = model.roi_heads(images, features, proposals)
# mask_features = [features[f] for f in model.roi_heads.in_features]
# mask_features = model.roi_heads.mask_pooler(mask_features, [x.pred_boxes for x in instances])
# # outs = model({'image': torch.randn(3, 640, 640).cuda()})
# # summary(model, input_data=torch.randn(3, 640, 640).cuda())
# # for name, child in model.backbone.named_children():
# # print(name)
# print('\nContents of output are: ')
# print(type(mask_features))
# # for key, val in outs.items():
# # print(f'{key}: {type(val)}')
# =============================================================================
# following tutorial: https://github.com/airsplay/py-bottom-up-attention/blob/master/demo/demo_feature_extraction.ipynb
import os
import io
from PIL import Image
import detectron2
# import some common detectron2 utilities
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from detectron2.modeling.box_regression import Box2BoxTransform
# import some common libraries
import numpy as np
# import cv2
import torch
def showarray(a, fmt='jpeg'):
a = np.uint8(np.clip(a, 0, 255))
# f = io.BytesIO()
Image.fromarray(a).save('out.jpeg')
# display(Image(data=f.getvalue()))
cfg = get_cfg()
cfg.merge_from_file('/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml') # diff cfg file: performs best with below given weights
# cfg.merge_from_file('/home/prabhu/detectron2/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml')
# cfg.merge_from_file("/home/prabhu/detectron2/configs/VG-Detection/faster_rcnn_R_101_C4_caffe.yaml", allow_unsafe=True) # original
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 300
cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.6
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2
# VG Weight
cfg.MODEL.WEIGHTS = "http://nlp.cs.unc.edu/models/faster_rcnn_from_caffe.pkl"
predictor = DefaultPredictor(cfg)
NUM_OBJECTS = 30
from detectron2.modeling.postprocessing import detector_postprocess
from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputLayers, FastRCNNOutputs, fast_rcnn_inference_single_image
img_file_path = '/home/prabhu/test/610bc917766a8-Largest_Zoos_In_India.jpeg'
# img_file_path = '/home/prabhu/test/3180-Pug_green_grass-732x549-thumbnail-732x549.jpg'
# img_file_path = '/home/prabhu/textvqa/5566811_bc00d504a6_o (5).jpg'
raw_img = np.array(Image.open(img_file_path))
def doit(raw_image):
with torch.no_grad():
raw_height, raw_width = raw_image.shape[:2]
print("Original image size: ", (raw_height, raw_width))
# Preprocessing
image = predictor.aug.get_transform(raw_image).apply_image(raw_image)
print("Transformed image size: ", image.shape[:2])
image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
inputs = [{"image": image, "height": raw_height, "width": raw_width}]
images = predictor.model.preprocess_image(inputs)
# Run Backbone Res1-Res4
features = predictor.model.backbone(images.tensor)
# Generate proposals with RPN
proposals, _ = predictor.model.proposal_generator(images, features, None)
proposal = proposals[0]
print('Proposal Boxes size:', proposal.proposal_boxes.tensor.shape)
# print(proposal.proposal_boxes.tensor)
# Run RoI head for each proposal (RoI Pooling + Res5)
proposal_boxes = [x.proposal_boxes for x in proposals]
features = [features[f] for f in predictor.model.roi_heads.in_features]
box_features = predictor.model.roi_heads._shared_roi_transform(
features, proposal_boxes
)
feature_pooled = box_features.mean(dim=[2, 3]) # pooled to 1x1
print('Pooled features size:', feature_pooled.shape)
# print('Proposals: ', proposals)
# Predict classes and boxes for each proposal.
pred_class_logits, pred_proposal_deltas = predictor.model.roi_heads.box_predictor(feature_pooled)
# print(predictor.model.roi_heads.pooler); print(cfg.MODEL)
# print(help(FastRCNNOutputs))
outputs = FastRCNNOutputs(
# predictor.model.roi_heads.pooler.box2box_transform,
Box2BoxTransform(weights=(1, 1, 1, 1)),
pred_class_logits,
pred_proposal_deltas,
proposals,
# predictor.model.proposal_generator.smooth_l1_beta,
)
probs = outputs.predict_probs()[0]
boxes = outputs.predict_boxes()[0]
print(f'Probs: {probs.shape}, boxes: {boxes.shape}')
# print(probs)
# Note: BUTD uses raw RoI predictions,
# we use the predicted boxes instead.
# boxes = proposal_boxes[0].tensor
# NMS
for nms_thresh in np.arange(0.5, 1.0, 0.1):
instances, ids = fast_rcnn_inference_single_image(
boxes, probs, image.shape[1:],
score_thresh=0.02, nms_thresh=nms_thresh, topk_per_image=NUM_OBJECTS
)
if len(ids) == NUM_OBJECTS:
break
print(f'After Non Max Separation, num of ids: {len(ids)} and num of instances: {len(instances)}')
instances = detector_postprocess(instances, raw_height, raw_width)
roi_features = feature_pooled[ids].detach()
# print(instances)
return instances, roi_features
instances, features = doit(raw_img)
print('\nShape of features:', features.shape)
print('Instances.shape: ', instances.pred_boxes.tensor.shape)
# print(instances.scores)
# print(instances.pred_boxes)
pred = instances.to('cpu')
v = Visualizer(raw_img[:, :, :], MetadataCatalog.get("vg"), scale=1.2)
v = v.draw_instance_predictions(pred)
showarray(v.get_image()[:, :, ::-1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment