Skip to content

Instantly share code, notes, and snippets.

@qiayuanl
Last active March 11, 2024 04:46
Show Gist options
  • Save qiayuanl/f97b24399e5a60065bbca90eb830c7e3 to your computer and use it in GitHub Desktop.
Save qiayuanl/f97b24399e5a60065bbca90eb830c7e3 to your computer and use it in GitHub Desktop.
import cv2 as cv
class ImageCropper:
def __init__(self, background, candidate, visualization=None):
self.ref_point = []
self.background = background
self.candidate = candidate
self.select = self.candidate.copy()
self.final = self.background.copy()
if visualization is not None:
if len(self.candidate.shape) == 2:
candidate_3channel = cv.cvtColor(candidate, cv.COLOR_GRAY2BGR)
else:
candidate_3channel = candidate
self.select = cv.addWeighted(candidate_3channel, 0.2, visualization, 0.8, 0)
def get_roi(self, image):
return image[self.ref_point[0][1]:self.ref_point[1][1], self.ref_point[0][0]:self.ref_point[1][0]]
def click_and_crop(self, event, x, y, flags, param):
if event == cv.EVENT_LBUTTONDOWN:
self.ref_point = [(x, y)]
elif event == cv.EVENT_LBUTTONUP:
self.ref_point.append((x, y))
cv.rectangle(self.select, self.ref_point[0], self.ref_point[1], (0, 255, 0), 2)
cv.imshow("Select", self.select)
roi = self.get_roi(self.candidate)
self.final[self.ref_point[0][1]:self.ref_point[1][1], self.ref_point[0][0]:self.ref_point[1][0]] = roi
cv.imshow("Final", self.final)
if event == cv.EVENT_RBUTTONDOWN:
self.ref_point = [(x, y)]
elif event == cv.EVENT_RBUTTONUP:
self.ref_point.append((x, y))
roi = self.get_roi(self.background)
self.final[self.ref_point[0][1]:self.ref_point[1][1], self.ref_point[0][0]:self.ref_point[1][0]] = roi
cv.imshow("Final", self.final)
def setup(self):
cv.imshow("Select", self.select)
cv.imshow("Final", self.final)
cv.setMouseCallback("Select", self.click_and_crop)
def run(self):
self.setup()
while True:
key = cv.waitKey(0) & 0xFF
if key == ord('c'):
break
return self.final
import argparse
import cv2 as cv
import imutils
import numpy as np
from image_cropper import ImageCropper
def crop_image_with_roi(image, center, xy_scale):
cx, cy = center
width, height = xy_scale
width = int(width * image.shape[1])
height = int(height * image.shape[0])
# Calculate the top left and bottom right coordinates of the ROI
x1 = int(cx - width / 2)
y1 = int(cy - height / 2)
x2 = int(cx + width / 2)
y2 = int(cy + height / 2)
# Get image dimensions
img_height, img_width = image.shape[:2]
# Adjust ROI if it goes out of the image bounds
if x1 < 0:
x1 = 0
x2 = width
if y1 < 0:
y1 = 0
y2 = height
if x2 > img_width:
x2 = img_width
x1 = img_width - width
if y2 > img_height:
y2 = img_height
y1 = img_height - height
x1 = max(x1, 0)
y1 = max(y1, 0)
x2 = min(x2, img_width)
y2 = min(y2, img_height)
return image[y1:y2, x1:x2]
def get_median_frame(cap, start, end):
frame_ids = (end - start) * np.random.uniform(size=int((end - start) / 10))
median_frames = []
for fid in frame_ids:
cap.set(cv.CAP_PROP_POS_FRAMES, fid)
_, frame = cap.read()
median_frames.append(frame)
return np.median(median_frames, axis=0).astype(dtype=np.uint8)
parser = argparse.ArgumentParser(description='Process a video file with blending or concatenation.')
parser.add_argument('mode', choices=['b', 'c'],
help='Processing mode: "b" for blend, "c" for horizontal concatenation')
parser.add_argument('filename', help='Name of the video file to process')
parser.add_argument('step', type=float, help='Each frame processing step')
parser.add_argument('--start', type=float, default=0, help='Start time in seconds (default: 0)')
parser.add_argument('--end', type=float, default=None,
help='End time in seconds (default: None, till the end of the video)')
parser.add_argument('--threshold', type=int, default=20, help='Threshold for the binary mask (default: 20)')
parser.add_argument('--overlap', dest='overlap', action='store_true')
parser.add_argument('--no-overlap', dest='overlap', action='store_false')
parser.add_argument('--scale-x', type=float, default=1.0, help='X scale for cropping (default: 1.0)')
parser.add_argument('--scale-y', type=float, default=1.0, help='Y scale for cropping (default: 1.0)')
args = parser.parse_args()
# Load the video
cap = cv.VideoCapture(args.filename)
fps = cap.get(cv.CAP_PROP_FPS)
frame_count = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
end = frame_count / fps
print(f"Video loaded: {frame_count} frames at {fps} fps")
if args.end is not None and end > args.end:
end = args.end
# Background subtractor
start_frame = int(args.start * fps)
end_frame = min(int(end * fps), frame_count - 1)
step_frame = int(args.step * fps)
median_frame = get_median_frame(cap, start_frame, end_frame)
frames_to_process = []
for frame_num in range(start_frame, end_frame + 1, step_frame):
frames_to_process.append(frame_num)
if frames_to_process[-1] != end_frame and (end_frame - frames_to_process[-1]) > 0:
frames_to_process.append(end_frame)
print(f"Processing {frames_to_process} frames")
cap.set(cv.CAP_PROP_POS_FRAMES, frames_to_process[-1])
_, frame = cap.read()
if args.mode == "c":
result = None
else:
result = frame
for frame_count in frames_to_process[:-1]:
cap.set(cv.CAP_PROP_POS_FRAMES, frame_count)
_, frame = cap.read()
diff1 = cv.absdiff(frame, median_frame)
gray1 = cv.cvtColor(diff1, cv.COLOR_BGR2GRAY)
_, mask1 = cv.threshold(gray1, args.threshold, 255, cv.THRESH_BINARY)
mask = mask1
if args.mode == "c":
visual_frame = frame.copy()
mask = cv.morphologyEx(mask, cv.MORPH_OPEN, cv.getStructuringElement(cv.MORPH_RECT, (3, 3)))
image_cropper = ImageCropper(np.zeros_like(mask), mask, frame)
mask = image_cropper.run()
cnts = cv.findContours(mask.copy(), cv.RETR_EXTERNAL, cv.CHAIN_APPROX_NONE)
cnts = imutils.grab_contours(cnts)
c = max(cnts, key=cv.contourArea)
M = cv.moments(c)
cropped_frame = crop_image_with_roi(frame, (round(M['m10'] / M['m00']), round(M['m01'] / M['m00'])),
(args.scale_x, args.scale_y))
cv.drawContours(visual_frame, [c], -1, (0, 255, 0), 2)
cv.circle(visual_frame, (round(M['m10'] / M['m00']), round(M['m01'] / M['m00'])), 5, (0, 255, 0), -1)
cv.imshow('visual_frame', visual_frame)
if result is None:
result = cropped_frame
else:
result = cv.hconcat([result, cropped_frame])
else:
if args.overlap:
diff2 = cv.absdiff(result, median_frame)
gray2 = cv.cvtColor(diff2, cv.COLOR_BGR2GRAY)
_, mask2 = cv.threshold(gray2, args.threshold, 255, cv.THRESH_BINARY)
mask = cv.subtract(mask1, mask2)
image_cropper = ImageCropper(np.zeros_like(mask), mask, cv.addWeighted(frame, 0.5, result, 0.5, 0))
mask = image_cropper.run()
mask_inv = cv.bitwise_not(mask)
index = min(float(frame_count / int(cap.get(cv.CAP_PROP_FRAME_COUNT))) + 0.4, 1)
result = cv.add(cv.bitwise_and(result, result, mask=mask_inv),
cv.addWeighted(cv.bitwise_and(median_frame, median_frame, mask=mask), 1 - index,
cv.bitwise_and(frame, frame, mask=mask), index, 0))
cv.destroyAllWindows()
if not args.mode == 'c':
cap.set(cv.CAP_PROP_POS_FRAMES, frames_to_process[-1])
_, final_frame = cap.read()
image_cropper = ImageCropper(final_frame, result)
result = image_cropper.run()
# Release video capture
cap.release()
cv.destroyAllWindows()
# Save the final blended image
cv.imwrite(args.filename + '_' + f'{start_frame / fps:.2f}' + '_' + f'{end_frame / fps:.2f}' + ".png", result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment