allenai · 2015aroras · Apr 2, 2024 · Apr 2, 2024 · Apr 2, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,6 +28,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Don't log garbage on nodes that aren't rank 0
 - Don't crash in the HF code when we are referring to a tokenizer in a local file
 - Fixed the size calculation for qk layer norm
+- Fixed pipeline test failure that occurs due to a bug in transformers version 4.39.1
 
 ## [v0.2.5](https://github.com/allenai/OLMo/releases/tag/v0.2.5) - 2024-03-06
 

diff --git a/hf_olmo/modeling_olmo.py b/hf_olmo/modeling_olmo.py
@@ -3,6 +3,7 @@
 
 import torch
 from transformers import PreTrainedModel
+from transformers.cache_utils import Cache
 from transformers.modeling_outputs import CausalLMOutputWithPast
 from transformers.models.auto import AutoModelForCausalLM
 
@@ -57,6 +58,9 @@ def forward(
         output_attentions: Optional[bool] = None,
         output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
+        cache_position: Optional[
+            Cache
+        ] = None,  # This is a hack mitigation of an issue in transformers `4.39.x` https://github.com/huggingface/transformers/issues/29426
     ) -> Union[Tuple, CausalLMOutputWithPast]:
         if use_cache is None:
             use_cache = self.config.use_cache