We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 0f9778a commit db3e52aCopy full SHA for db3e52a
1 file changed
transformerXL.py
@@ -75,7 +75,7 @@ def __call__(self, values_keys:jnp.ndarray, queries:jnp.ndarray, pos_embed:jnp.n
75
out = self.dense1(out_attention_n)
76
out = nn.activation.gelu(out)
77
#out = nn.activation.relu(out)
78
- out = self.dense2(out_attention)
+ out = self.dense2(out)
79
if(self.gating):
80
out= self.gate2(out,jax.nn.relu(out_attention))
81
else:
0 commit comments