matthewchung74/mlp.py

## mlp.py
class Mlp(nn.Module):
    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act = act_layer()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(drop)

    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.drop(x)
        return x

embed_dim = 768
num_heads = 8
block = Block(embed_dim, 8)

batch_size = 1
class_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
class_tokens = class_token.expand(batch_size, -1, -1)
pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))

x = torch.cat((class_tokens, patch_output), dim=1)
x = x + pos_embed

block(x)

tensor([[[-0.0246,  0.2249,  0.1030,  ...,  0.0197,  0.2219, -0.1692],
         [ 0.0127, -0.0497,  0.5471,  ...,  0.2520,  0.2828,  0.0150],
         [-0.2090,  0.0911,  0.2493,  ...,  0.4223,  0.2937,  0.0398],
         ...,
         [ 0.0108,  0.0676,  0.4966,  ...,  0.2874,  0.3851, -0.0341],
         [ 0.0896, -0.0787,  0.4181,  ...,  0.3768,  0.3757,  0.0293],
         [ 0.0546,  0.0605,  0.2335,  ...,  0.3212,  0.1791,  0.1017]]],
       grad_fn=<AddBackward0>)
	class Mlp(nn.Module):
	def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
	super().__init__()
	out_features = out_features or in_features
	hidden_features = hidden_features or in_features
	self.fc1 = nn.Linear(in_features, hidden_features)
	self.act = act_layer()
	self.fc2 = nn.Linear(hidden_features, out_features)
	self.drop = nn.Dropout(drop)

	def forward(self, x):
	x = self.fc1(x)
	x = self.act(x)
	x = self.drop(x)
	x = self.fc2(x)
	x = self.drop(x)
	return x

	embed_dim = 768
	num_heads = 8
	block = Block(embed_dim, 8)

	batch_size = 1
	class_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
	class_tokens = class_token.expand(batch_size, -1, -1)
	pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))

	x = torch.cat((class_tokens, patch_output), dim=1)
	x = x + pos_embed

	block(x)

	tensor([[[-0.0246, 0.2249, 0.1030, ..., 0.0197, 0.2219, -0.1692],
	[ 0.0127, -0.0497, 0.5471, ..., 0.2520, 0.2828, 0.0150],
	[-0.2090, 0.0911, 0.2493, ..., 0.4223, 0.2937, 0.0398],
	...,
	[ 0.0108, 0.0676, 0.4966, ..., 0.2874, 0.3851, -0.0341],
	[ 0.0896, -0.0787, 0.4181, ..., 0.3768, 0.3757, 0.0293],
	[ 0.0546, 0.0605, 0.2335, ..., 0.3212, 0.1791, 0.1017]]],
	grad_fn=<AddBackward0>)