DIYer22/self_attention.py

## self_attention.py
# -*- coding: utf-8 -*-
"""
code for self-attention

see self-attention GAN
none-local

@author: yl
"""

from boxx import *
ylsys.usecuda = False
from boxx.ylth import *

eps = 1e-10
size = lambda t:reduce(mul,t.size())
def flatten(t, dim=-1):
    shape = list(t.shape)
    shape[dim-1] *= shape[dim]
    shape.pop(dim)
    return t.reshape(tuple(shape))
def v2t(v, shape=None):
    if shape is None:
        shape = v.shape[:-1] + (int(v.shape[-1]**.5),)*2
    return v.reshape(shape)
img = sda.astronaut()
arr = norma(img)[::10,::10]

t = tht(arr).permute(2,0,1).float()
t.requires_grad = True

v = flatten(t, )


cols = th.matmul(v[...,None], torch.ones(v.shape[-1])[...,None,:])
rows = th.matmul(torch.ones(v.shape[-1])[...,None], v[...,None,:])

distance = ((rows-cols)**2).mean(-3)**.5 # 可以优化为半三角形
expt = th.exp(-distance)
#expt = th.exp(th.exp(expt))
attention = expt/(expt.sum(-1)[...,None]+eps)

newv = (attention*v[...,None,:]).sum(-1)


newt = v2t(newv)

pix = 10
show(v2t(attention[pix]), img, None and v2t(-distance[pix]))
pix = 25*52
show(v2t(attention[pix]), img, None and  v2t(-distance[pix]))
	# -- coding: utf-8 --
	"""
	code for self-attention

	see self-attention GAN
	none-local

	@author: yl
	"""

	from boxx import *
	ylsys.usecuda = False
	from boxx.ylth import *

	eps = 1e-10
	size = lambda t:reduce(mul,t.size())
	def flatten(t, dim=-1):
	shape = list(t.shape)
	shape[dim-1] *= shape[dim]
	shape.pop(dim)
	return t.reshape(tuple(shape))
	def v2t(v, shape=None):
	if shape is None:
	shape = v.shape[:-1] + (int(v.shape[-1]*.5),)2
	return v.reshape(shape)
	img = sda.astronaut()
	arr = norma(img)[::10,::10]

	t = tht(arr).permute(2,0,1).float()
	t.requires_grad = True

	v = flatten(t, )


	cols = th.matmul(v[...,None], torch.ones(v.shape[-1])[...,None,:])
	rows = th.matmul(torch.ones(v.shape[-1])[...,None], v[...,None,:])

	distance = ((rows-cols)2).mean(-3).5 # 可以优化为半三角形
	expt = th.exp(-distance)
	#expt = th.exp(th.exp(expt))
	attention = expt/(expt.sum(-1)[...,None]+eps)

	newv = (attention*v[...,None,:]).sum(-1)


	newt = v2t(newv)

	pix = 10
	show(v2t(attention[pix]), img, None and v2t(-distance[pix]))
	pix = 25*52
	show(v2t(attention[pix]), img, None and v2t(-distance[pix]))