Last active
January 7, 2019 18:11
-
-
Save ydixon/92da8e403cb70750f26602247e4ece77 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import math | |
import os | |
import random | |
from sys import platform | |
import cv2 | |
import numpy as np | |
import torch | |
# from torch.utils.data import Dataset | |
from utils.utils import xyxy2xywh | |
class load_images(): # for inference | |
def __init__(self, path, batch_size=1, img_size=416): | |
if os.path.isdir(path): | |
image_format = ['.jpg', '.jpeg', '.png', '.tif'] | |
self.files = sorted(glob.glob('%s/*.*' % path)) | |
self.files = list(filter(lambda x: os.path.splitext(x)[1].lower() in image_format, self.files)) | |
elif os.path.isfile(path): | |
self.files = [path] | |
self.nF = len(self.files) # number of image files | |
self.nB = math.ceil(self.nF / batch_size) # number of batches | |
self.batch_size = batch_size | |
self.height = img_size | |
assert self.nF > 0, 'No images found in path %s' % path | |
# RGB normalization values | |
# self.rgb_mean = np.array([60.134, 49.697, 40.746], dtype=np.float32).reshape((3, 1, 1)) | |
# self.rgb_std = np.array([29.99, 24.498, 22.046], dtype=np.float32).reshape((3, 1, 1)) | |
def __iter__(self): | |
self.count = -1 | |
return self | |
def __next__(self): | |
self.count += 1 | |
if self.count == self.nB: | |
raise StopIteration | |
img_path = self.files[self.count] | |
# Read image | |
img = cv2.imread(img_path) # BGR | |
# Padded resize | |
img, _, _, _ = resize_square(img, height=self.height, color=(127.5, 127.5, 127.5)) | |
# Normalize RGB | |
img = img[:, :, ::-1].transpose(2, 0, 1) | |
img = np.ascontiguousarray(img, dtype=np.float32) | |
# img -= self.rgb_mean | |
# img /= self.rgb_std | |
img /= 255.0 | |
return [img_path], img | |
def __len__(self): | |
return self.nB # number of batches | |
class load_images_v2(): | |
def __init__(self, path, batch_size=1, img_size=416, augment=False): | |
self.path = path | |
with open(path, 'r') as file: | |
self.img_files = file.readlines() | |
self.img_files = [path.replace('\n', '') for path in self.img_files] | |
self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in | |
self.img_files] | |
self.nF = len(self.img_files) # number of image files | |
self.nB = math.ceil(self.nF / batch_size) # number of batches | |
self.batch_size = batch_size | |
self.height = img_size | |
self.augment = augment | |
assert self.nB > 0, 'No images found in path %s' % path | |
# RGB normalization values | |
# self.rgb_mean = np.array([60.134, 49.697, 40.746], dtype=np.float32).reshape((1, 3, 1, 1)) | |
# self.rgb_std = np.array([29.99, 24.498, 22.046], dtype=np.float32).reshape((1, 3, 1, 1)) | |
def __iter__(self): | |
self.count = -1 | |
return self | |
def __next__(self): | |
self.count += 1 | |
if self.count == self.nB: | |
raise StopIteration | |
ia = self.count * self.batch_size | |
ib = min((self.count + 1) * self.batch_size, self.nF) | |
height = self.height | |
img_all = [] | |
img_path_all = [] | |
for index, files_index in enumerate(range(ia, ib)): | |
img_path = self.img_files[files_index] | |
#print(img_path) | |
img = cv2.imread(img_path) # BGR | |
if img is None: | |
continue | |
h, w, _ = img.shape | |
img, ratio, padw, padh = resize_square(img, height=height, color=(127.5, 127.5, 127.5)) | |
img_all.append(img) | |
img_path_all.append(img_path) | |
# print(img_all) | |
# Normalize | |
img_all = np.stack(img_all)[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB and cv2 to pytorch | |
img_all = np.ascontiguousarray(img_all, dtype=np.float32) | |
# img_all -= self.rgb_mean | |
# img_all /= self.rgb_std | |
img_all /= 255.0 | |
return img_path_all, torch.from_numpy(img_all) | |
def __len__(self): | |
return self.nB # number of batches | |
class load_images_and_labels(): # for training | |
def __init__(self, path, batch_size=1, img_size=608, multi_scale=False, augment=False): | |
self.path = path | |
# self.img_files = sorted(glob.glob('%s/*.*' % path)) | |
with open(path, 'r') as file: | |
self.img_files = file.readlines() | |
self.img_files = [path.replace('\n', '') for path in self.img_files] | |
self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt') for path in | |
self.img_files] | |
self.nF = len(self.img_files) # number of image files | |
self.nB = math.ceil(self.nF / batch_size) # number of batches | |
self.batch_size = batch_size | |
self.height = img_size | |
self.multi_scale = multi_scale | |
self.augment = augment | |
assert self.nB > 0, 'No images found in path %s' % path | |
# RGB normalization values | |
# self.rgb_mean = np.array([60.134, 49.697, 40.746], dtype=np.float32).reshape((1, 3, 1, 1)) | |
# self.rgb_std = np.array([29.99, 24.498, 22.046], dtype=np.float32).reshape((1, 3, 1, 1)) | |
def __iter__(self): | |
self.count = -1 | |
self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF) | |
return self | |
def __next__(self): | |
self.count += 1 | |
if self.count == self.nB: | |
raise StopIteration | |
ia = self.count * self.batch_size | |
ib = min((self.count + 1) * self.batch_size, self.nF) | |
if self.multi_scale: | |
# Multi-Scale YOLO Training | |
height = random.choice(range(10, 20)) * 32 # 320 - 608 pixels | |
else: | |
# Fixed-Scale YOLO Training | |
height = self.height | |
img_all = [] | |
labels_all = [] | |
for index, files_index in enumerate(range(ia, ib)): | |
img_path = self.img_files[self.shuffled_vector[files_index]] | |
label_path = self.label_files[self.shuffled_vector[files_index]] | |
img = cv2.imread(img_path) # BGR | |
if img is None: | |
continue | |
augment_hsv = True | |
if self.augment and augment_hsv: | |
# SV augmentation by 50% | |
fraction = 0.50 | |
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) | |
S = img_hsv[:, :, 1].astype(np.float32) | |
V = img_hsv[:, :, 2].astype(np.float32) | |
a = (random.random() * 2 - 1) * fraction + 1 | |
S *= a | |
if a > 1: | |
np.clip(S, a_min=0, a_max=255, out=S) | |
a = (random.random() * 2 - 1) * fraction + 1 | |
V *= a | |
if a > 1: | |
np.clip(V, a_min=0, a_max=255, out=V) | |
img_hsv[:, :, 1] = S.astype(np.uint8) | |
img_hsv[:, :, 2] = V.astype(np.uint8) | |
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) | |
h, w, _ = img.shape | |
img, ratio, padw, padh = resize_square(img, height=height, color=(127.5, 127.5, 127.5)) | |
# Load labels | |
if os.path.isfile(label_path): | |
labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 5) | |
# Normalized xywh to pixel xyxy format | |
labels = labels0.copy() | |
labels[:, 1] = ratio * w * (labels0[:, 1] - labels0[:, 3] / 2) + padw | |
labels[:, 2] = ratio * h * (labels0[:, 2] - labels0[:, 4] / 2) + padh | |
labels[:, 3] = ratio * w * (labels0[:, 1] + labels0[:, 3] / 2) + padw | |
labels[:, 4] = ratio * h * (labels0[:, 2] + labels0[:, 4] / 2) + padh | |
else: | |
labels = np.array([]) | |
# Augment image and labels | |
if self.augment: | |
img, labels, M = random_affine(img, labels, degrees=(-5, 5), translate=(0.10, 0.10), scale=(0.90, 1.10)) | |
plotFlag = False | |
if plotFlag: | |
import matplotlib.pyplot as plt | |
plt.figure(figsize=(10, 10)) if index == 0 else None | |
plt.subplot(4, 4, index + 1).imshow(img[:, :, ::-1]) | |
plt.plot(labels[:, [1, 3, 3, 1, 1]].T, labels[:, [2, 2, 4, 4, 2]].T, '.-') | |
plt.axis('off') | |
nL = len(labels) | |
if nL > 0: | |
# convert xyxy to xywh | |
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5].copy()) / height | |
if self.augment: | |
# random left-right flip | |
lr_flip = True | |
if lr_flip & (random.random() > 0.5): | |
img = np.fliplr(img) | |
if nL > 0: | |
labels[:, 1] = 1 - labels[:, 1] | |
# random up-down flip | |
ud_flip = False | |
if ud_flip & (random.random() > 0.5): | |
img = np.flipud(img) | |
if nL > 0: | |
labels[:, 2] = 1 - labels[:, 2] | |
img_all.append(img) | |
labels_all.append(torch.from_numpy(labels)) | |
# Normalize | |
img_all = np.stack(img_all)[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB and cv2 to pytorch | |
img_all = np.ascontiguousarray(img_all, dtype=np.float32) | |
# img_all -= self.rgb_mean | |
# img_all /= self.rgb_std | |
img_all /= 255.0 | |
return torch.from_numpy(img_all), labels_all | |
def __len__(self): | |
return self.nB # number of batches | |
def resize_square(img, height=416, color=(0, 0, 0)): # resize a rectangular image to a padded square | |
shape = img.shape[:2] # shape = [height, width] | |
ratio = float(height) / max(shape) # ratio = old / new | |
new_shape = [round(shape[0] * ratio), round(shape[1] * ratio)] | |
dw = height - new_shape[1] # width padding | |
dh = height - new_shape[0] # height padding | |
top, bottom = dh // 2, dh - (dh // 2) | |
left, right = dw // 2, dw - (dw // 2) | |
img = cv2.resize(img, (new_shape[1], new_shape[0]), interpolation=cv2.INTER_AREA) # resized, no border | |
return cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color), ratio, dw // 2, dh // 2 | |
def random_affine(img, targets=None, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2), | |
borderValue=(127.5, 127.5, 127.5)): | |
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) | |
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 | |
border = 0 # width of added border (optional) | |
height = max(img.shape[0], img.shape[1]) + border * 2 | |
# Rotation and Scale | |
R = np.eye(3) | |
a = random.random() * (degrees[1] - degrees[0]) + degrees[0] | |
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations | |
s = random.random() * (scale[1] - scale[0]) + scale[0] | |
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) | |
# Translation | |
T = np.eye(3) | |
T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border # x translation (pixels) | |
T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border # y translation (pixels) | |
# Shear | |
S = np.eye(3) | |
S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # x shear (deg) | |
S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) # y shear (deg) | |
M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! | |
imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR, | |
borderValue=borderValue) # BGR order borderValue | |
# Return warped points also | |
if targets is not None: | |
if len(targets) > 0: | |
n = targets.shape[0] | |
points = targets[:, 1:5].copy() | |
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1]) | |
# warp points | |
xy = np.ones((n * 4, 3)) | |
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 | |
xy = (xy @ M.T)[:, :2].reshape(n, 8) | |
# create new boxes | |
x = xy[:, [0, 2, 4, 6]] | |
y = xy[:, [1, 3, 5, 7]] | |
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T | |
# apply angle-based reduction | |
radians = a * math.pi / 180 | |
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 | |
x = (xy[:, 2] + xy[:, 0]) / 2 | |
y = (xy[:, 3] + xy[:, 1]) / 2 | |
w = (xy[:, 2] - xy[:, 0]) * reduction | |
h = (xy[:, 3] - xy[:, 1]) * reduction | |
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T | |
# reject warped points outside of image | |
np.clip(xy, 0, height, out=xy) | |
w = xy[:, 2] - xy[:, 0] | |
h = xy[:, 3] - xy[:, 1] | |
area = w * h | |
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) | |
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10) | |
targets = targets[i] | |
targets[:, 1:5] = xy[i] | |
return imw, targets, M | |
else: | |
return imw | |
def convert_tif2bmp(p='../xview/val_images_bmp'): | |
import glob | |
import cv2 | |
files = sorted(glob.glob('%s/*.tif' % p)) | |
for i, f in enumerate(files): | |
print('%g/%g' % (i + 1, len(files))) | |
cv2.imwrite(f.replace('.tif', '.bmp'), cv2.imread(f)) | |
os.system('rm -rf ' + f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment