HabanaAI · michalkuligowski · Jul 10, 2025 · Jul 9, 2025 · Jul 10, 2025 · Jul 10, 2025
@@ -90,6 +90,7 @@
 
 DUMMY_TOKEN_ID = -1
 UNSET_IMG_ARGS = 9999999
+shutdown_inc_called = False
 
 
 class PhaseType(Enum):
@@ -3280,10 +3281,17 @@ def finish_measurements(self):
         finalize_calibration(self.model.model)
 
     def shutdown_inc(self):
-        can_finalize_inc = self._is_quant_with_inc() and \
-            (self.model.model is not None) and \
-            self.inc_initialized_successfully and \
-            not getattr(self, "_is_inc_finalized", False)
+        global shutdown_inc_called
+        if shutdown_inc_called:
+            return
+        shutdown_inc_called = True
+        can_finalize_inc = False
+        from contextlib import suppress
+        with suppress(AttributeError):
+            can_finalize_inc = (self._is_quant_with_inc()
+                                and (self.model.model is not None)
+                                and self.inc_initialized_successfully and
+                                not getattr(self, "_is_inc_finalized", False))
         if can_finalize_inc:
             from neural_compressor.torch.quantization import (
                 finalize_calibration)
@@ -4169,6 +4177,10 @@ def _make_decode_output(
         return SamplerOutput(sampler_outputs)
 
     def shutdown_inc(self):
+        global shutdown_inc_called
+        if shutdown_inc_called:
+            return
+        shutdown_inc_called = True
         can_finalize_inc = False
         from contextlib import suppress
         with suppress(AttributeError):

@@ -506,7 +506,7 @@ def list_prompt_adapters(self) -> Set[int]:
             "Prompt Adapter is not implemented for HPU backend.")
 
     def shutdown(self):
-        self.model_runner.shutdown_inc()
+        getattr(self.model_runner, 'shutdown_inc', lambda: None)()
 
     @property
     def max_model_len(self) -> int: