728x90
nlp.seas.harvard.edu/2018/04/01/attention.html#position-wise-feed-forward-networks
class PositionalEncoding(nn.Module):
"Implement the PE function."
def __init__(self, d_model, dropout, max_len=5000):
super(PositionalEncoding, self).__init__()
self.dropout = nn.Dropout(p=dropout)
# Compute the positional encodings once in log space.
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2) *
-(math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
x = x + Variable(self.pe[:, :x.size(1)], requires_grad=False)
return self.dropout(x)
Postional Encoding
์์ ์ํ ์ ๊ฒฝ๋ง๊ณผ ๋ฌ๋ฆฌ ํธ๋์คํฌ๋จธ ๋ชจ๋ธ์ ์
๋ ฅ์ ๋จ์ด ํ๋ํ๋ ์์ฐจ์ ์ผ๋ก ๋ฃ์ง ์๊ณ ํ๋ฒ์ ๋ฃ๋๋ค. ์
๋ ฅ ์ํ์ค์ ๋ํ ์์ ์ ๋ณด๋ฅผ ์ ๋ฌํด์ค์ผํ๋ค.
์์ ์ ๋ณด๋ฅผ ์ฃผ์
ํ๋ ํจ์ (์ง์ : 2i, ํ์: 2i+1)
$\sin$๊ณผ $\cos$ ํจ์๋ ์ํ์ค ์์น์ ๋ฐ๋ผ ํผ์ฒ ์ฐจ์ ์ธ๋ฑ์ค์ ๊ฐ์์ ์์น ์ ๋ณด๋ฅผ ๋ฌ๋ฆฌ ์ฃผ๊ณ ์ํ๋ค.
๋ฐ์ํ
'๐พ Deep Learning' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
[Transformer] Model ์ ๋ฆฌ (0) | 2021.02.23 |
---|---|
VAE(Variational autoencoder) ์ข ๋ฅ (0) | 2021.02.21 |
[Transformer] Position-wise Feed-Forward Networks (2) (0) | 2021.02.20 |
[Transformer] Self-Attension ์ ํ ์ดํ ์ (0) (0) | 2021.02.19 |
VAE(Variational Autoencoder) (3) MNIST (0) | 2021.02.18 |