allenai · davidbrandfonbrener · Mar 26, 2024 · Mar 24, 2024 · Mar 25, 2024 · Mar 25, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Don't log garbage on nodes that aren't rank 0
 - Don't crash in the HF code when we are referring to a tokenizer in a local file
+- Fixed the size calculation for qk layer norm
 
 ## [v0.2.5](https://github.com/allenai/OLMo/releases/tag/v0.2.5) - 2024-03-06
 

diff --git a/olmo/model.py b/olmo/model.py
@@ -425,10 +425,10 @@ def __init__(self, layer_id: int, config: ModelConfig, cache: BufferCache):
         self.k_norm: Optional[LayerNormBase] = None
         self.q_norm: Optional[LayerNormBase] = None
         if config.attention_layer_norm:
-            assert config.n_kv_heads is not None
+            assert config.effective_n_kv_heads is not None
             self.k_norm = LayerNormBase.build(
                 config,
-                size=config.d_model // config.effective_n_kv_heads,
+                size=(config.d_model // config.n_heads) * config.effective_n_kv_heads,
                 elementwise_affine=config.attention_layer_norm_with_affine,
             )
             self.q_norm = LayerNormBase.build(config, elementwise_affine=config.attention_layer_norm_with_affine)