Skip to content

Instantly share code, notes, and snippets.

@wolfecameron
Last active March 6, 2025 19:18
Show Gist options
  • Save wolfecameron/3ed9274a0297aab403b5e2d2254ee0ac to your computer and use it in GitHub Desktop.
Save wolfecameron/3ed9274a0297aab403b5e2d2254ee0ac to your computer and use it in GitHub Desktop.
Feed-forward layer of a transformer.
"""
Source: https://github.com/karpathy/nanoGPT/blob/master/model.py
"""
from torch import nn
class MLP(nn.Module):
def __init__(
self,
d,
bias=False,
dropout=0.2
):
"""
Arguments:
d: size of embedding dimension
bias: whether or not to use bias in linear layers
dropout: probability of dropout
"""
super().__init__()
self.c_fc = nn.Linear(d, 4 * d, bias=bias)
self.gelu = nn.GELU()
self.c_proj = nn.Linear(4 * d, d, bias=bias)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.c_fc(x)
x = self.gelu(x)
x = self.c_proj(x)
x = self.dropout(x)
return x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment