Last active
March 23, 2024 16:36
-
-
Save etienne87/b79c6b4aa0ceb2cff554c32a7079fa5a to your computer and use it in GitHub Desktop.
Histogram-of-Oriented Gradient in Pytorch in 10 minutes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import math | |
import time | |
def timeit(x, func, iter=10): | |
torch.cuda.synchronize() | |
start = time.time() | |
for _ in range(iter): | |
y = func(x) | |
torch.cuda.synchronize() | |
runtime = (time.time()-start)/iter | |
return runtime | |
class HOGLayer(nn.Module): | |
def __init__(self, nbins=10, pool=8, max_angle=math.pi, stride=1, padding=1, dilation=1): | |
super(HOGLayer, self).__init__() | |
self.nbins = nbins | |
self.stride = stride | |
self.padding = padding | |
self.dilation = dilation | |
self.pool = pool | |
self.max_angle = max_angle | |
mat = torch.FloatTensor([[1, 0, -1], | |
[2, 0, -2], | |
[1, 0, -1]]) | |
mat = torch.cat((mat[None], mat.t()[None]), dim=0) | |
self.register_buffer("weight", mat[:,None,:,:]) | |
self.pooler = nn.AvgPool2d(pool, stride=pool, padding=0, ceil_mode=False, count_include_pad=True) | |
def forward(self, x): | |
with torch.no_grad(): | |
gxy = F.conv2d(x, self.weight, None, self.stride, | |
self.padding, self.dilation, 1) | |
#2. Mag/ Phase | |
mag = gxy.norm(dim=1) | |
norm = mag[:,None,:,:] | |
phase = torch.atan2(gxy[:,0,:,:], gxy[:,1,:,:]) | |
#3. Binning Mag with linear interpolation | |
phase_int = phase / self.max_angle * self.nbins | |
phase_int = phase_int[:,None,:,:] | |
n, c, h, w = gxy.shape | |
out = torch.zeros((n, self.nbins, h, w), dtype=torch.float, device=gxy.device) | |
out.scatter_(1, phase_int.floor().long()%self.nbins, norm) | |
out.scatter_add_(1, phase_int.ceil().long()%self.nbins, 1 - norm) | |
return self.pooler(out) | |
class HOGLayerMoreComplicated(nn.Module): | |
def __init__(self, nbins=10, pool=8, max_angle=math.pi, stride=1, padding=1, dilation=1, | |
mean_in=False, max_out=False): | |
super(HOGLayerMoreComplicated, self).__init__() | |
self.nbins = nbins | |
self.stride = stride | |
self.padding = padding | |
self.dilation = dilation | |
self.pool = pool | |
self.max_angle = max_angle | |
self.max_out = max_out | |
self.mean_in = mean_in | |
mat = torch.FloatTensor([[1, 0, -1], | |
[2, 0, -2], | |
[1, 0, -1]]) | |
mat = torch.cat((mat[None], mat.t()[None]), dim=0) | |
self.register_buffer("weight", mat[:,None,:,:]) | |
self.pooler = nn.AvgPool2d(pool, stride=pool, padding=0, ceil_mode=False, count_include_pad=True) | |
def forward(self, x): | |
if self.mean_in: | |
return self.forward_v1(x) | |
else: | |
return self.forward_v2(x) | |
def forward_v1(self, x): | |
if x.size(1) > 1: | |
x = x.mean(dim=1)[:,None,:,:] | |
gxy = F.conv2d(x, self.weight, None, self.stride, | |
self.padding, self.dilation, 1) | |
# 2. Mag/ Phase | |
mag = gxy.norm(dim=1) | |
norm = mag[:, None, :, :] | |
phase = torch.atan2(gxy[:, 0, :, :], gxy[:, 1, :, :]) | |
# 3. Binning Mag with linear interpolation | |
phase_int = phase / self.max_angle * self.nbins | |
phase_int = phase_int[:, None, :, :] | |
n, c, h, w = gxy.shape | |
out = torch.zeros((n, self.nbins, h, w), dtype=torch.float, device=gxy.device) | |
out.scatter_(1, phase_int.floor().long() % self.nbins, norm) | |
out.scatter_add_(1, phase_int.ceil().long() % self.nbins, 1 - norm) | |
return self.pooler(out) | |
def forward_v2(self, x): | |
batch_size, in_channels, height, width = x.shape | |
weight = self.weight.repeat(3, 1, 1, 1) | |
gxy = F.conv2d(x, weight, None, self.stride, | |
self.padding, self.dilation, groups=in_channels) | |
gxy = gxy.view(batch_size, in_channels, 2, height, width) | |
if self.max_out: | |
gxy = gxy.max(dim=1)[0][:,None,:,:,:] | |
#2. Mag/ Phase | |
mags = gxy.norm(dim=2) | |
norms = mags[:,:,None,:,:] | |
phases = torch.atan2(gxy[:,:,0,:,:], gxy[:,:,1,:,:]) | |
#3. Binning Mag with linear interpolation | |
phases_int = phases / self.max_angle * self.nbins | |
phases_int = phases_int[:,:,None,:,:] | |
out = torch.zeros((batch_size, in_channels, self.nbins, height, width), | |
dtype=torch.float, device=gxy.device) | |
out.scatter_(2, phases_int.floor().long()%self.nbins, norms) | |
out.scatter_add_(2, phases_int.ceil().long()%self.nbins, 1 - norms) | |
out = out.view(batch_size, in_channels * self.nbins, height, width) | |
out = torch.cat((self.pooler(out), self.pooler(x)), dim=1) | |
return out | |
if __name__ == '__main__': | |
import cv2 | |
cuda = True | |
path = '/home/etienneperot/Pictures/pencils.jpg' | |
im = cv2.imread(path, cv2.IMREAD_GRAYSCALE) | |
x = torch.from_numpy(im)[None, None] | |
if cuda: | |
x = x.cuda().float() | |
else: | |
x = x.float() | |
hog = HOGLayer(nbins=12, pool=2) | |
if cuda: | |
hog = hog.cuda() | |
y = hog(x) | |
y2 = y.cpu().numpy() | |
bin = 0 | |
while 1: | |
im = y2[0, bin] | |
im = (im-im.min())/(im.max()-im.min()) | |
print('bin: ', bin, ' ang: ', float(bin)/hog.nbins * 180.0) | |
cv2.imshow('bin', im) | |
cv2.waitKey() | |
bin = (bin + 1) % hog.nbins |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment