docling-project · PeterStaar-IBM · Nov 27, 2025 · Oct 23, 2025 · Oct 24, 2025 · Oct 24, 2025
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -20,7 +20,7 @@ env:
     tests/test_asr_pipeline.py
     tests/test_threaded_pipeline.py
   PYTEST_TO_SKIP: |-
-  EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|suryaocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping|mlx_whisper_example|gpu_standard_pipeline|gpu_vlm_pipeline|demo_layout_vlm)\.py$'
+  EXAMPLES_TO_SKIP: '^(batch_convert|compare_vlm_models|minimal|minimal_vlm_pipeline|minimal_asr_pipeline|export_multimodal|custom_convert|develop_picture_enrichment|rapidocr_with_custom_models|suryaocr_with_custom_models|offline_convert|pictures_description|pictures_description_api|vlm_pipeline_api_model|granitedocling_repetition_stopping|mlx_whisper_example|gpu_standard_pipeline|gpu_vlm_pipeline|demo_layout_vlm|post_process_ocr_with_vlm)\.py$'
 
 jobs:
   lint:

diff --git a/docling/utils/api_image_request.py b/docling/utils/api_image_request.py
@@ -23,51 +23,72 @@ def api_image_request(
     **params,
 ) -> Tuple[str, Optional[int], VlmStopReason]:
     img_io = BytesIO()
-    image.save(img_io, "PNG")
-    image_base64 = base64.b64encode(img_io.getvalue()).decode("utf-8")
-    messages = [
-        {
-            "role": "user",
-            "content": [
+    image = (
+        image.copy()
+    )  # Fix for inconsistent PIL image width/height to actual byte data
+    image = image.convert("RGBA")
+    good_image = True
+    try:
+        image.save(img_io, "PNG")
+    except Exception as e:
+        good_image = False
+        _log.error(f"Error, corrupter PNG of size: {image.size}: {e}")
+
+    if good_image:
+        try:
+            image_base64 = base64.b64encode(img_io.getvalue()).decode("utf-8")
+
+            messages = [
                 {
-                    "type": "image_url",
-                    "image_url": {"url": f"data:image/png;base64,{image_base64}"},
-                },
-                {
-                    "type": "text",
-                    "text": prompt,
-                },
-            ],
-        }
-    ]
-
-    payload = {
-        "messages": messages,
-        **params,
-    }
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/png;base64,{image_base64}"
+                            },
+                        },
+                        {
+                            "type": "text",
+                            "text": prompt,
+                        },
+                    ],
+                }
+            ]
+
+            payload = {
+                "messages": messages,
+                **params,
+            }
+
+            headers = headers or {}
+
+            r = requests.post(
+                str(url),
+                headers=headers,
+                json=payload,
+                timeout=timeout,
+            )
+            if not r.ok:
+                _log.error(f"Error calling the API. Response was {r.text}")
+                # image.show()
+            # r.raise_for_status()
+
+            api_resp = OpenAiApiResponse.model_validate_json(r.text)
+            generated_text = api_resp.choices[0].message.content.strip()
+            num_tokens = api_resp.usage.total_tokens
+            stop_reason = (
+                VlmStopReason.LENGTH
+                if api_resp.choices[0].finish_reason == "length"
+                else VlmStopReason.END_OF_SEQUENCE
+            )
 
-    headers = headers or {}
-
-    r = requests.post(
-        str(url),
-        headers=headers,
-        json=payload,
-        timeout=timeout,
-    )
-    if not r.ok:
-        _log.error(f"Error calling the API. Response was {r.text}")
-    r.raise_for_status()
-
-    api_resp = OpenAiApiResponse.model_validate_json(r.text)
-    generated_text = api_resp.choices[0].message.content.strip()
-    num_tokens = api_resp.usage.total_tokens
-    stop_reason = (
-        VlmStopReason.LENGTH
-        if api_resp.choices[0].finish_reason == "length"
-        else VlmStopReason.END_OF_SEQUENCE
-    )
-
-    return generated_text, num_tokens, stop_reason
+            return generated_text, num_tokens, stop_reason
+        except Exception as e:
+            _log.error(f"Error, could not process request: {e}")
+            return "", 0, VlmStopReason.UNSPECIFIED
+    else:
+        return "", 0, VlmStopReason.UNSPECIFIED
 
 
 def api_image_request_streaming(