From 167f2b89d2f11146e889505a2cdd91baee6c6b24 Mon Sep 17 00:00:00 2001 From: gustipardo Date: Mon, 2 Mar 2026 21:09:36 -0300 Subject: [PATCH 1/4] fix(gemini): ensure image token accumulation in usage metadata Fixed an issue where image tokens were being overwritten instead of accumulated in Gemini responses. Added support for both camelCase and snake_case token count keys. Fixes #22082. --- excalidraw.log | 4 + .../gemini/image_generation/transformation.py | 10 +-- .../vertex_and_google_ai_studio_gemini.py | 66 +++++++++------ package-lock.json | 82 ++++++------------- scripts/repro_gemini_image_cost.py | 50 +++++++++++ 5 files changed, 122 insertions(+), 90 deletions(-) create mode 100644 excalidraw.log create mode 100644 scripts/repro_gemini_image_cost.py diff --git a/excalidraw.log b/excalidraw.log new file mode 100644 index 00000000000..1bc8507c2e1 --- /dev/null +++ b/excalidraw.log @@ -0,0 +1,4 @@ +2026-03-02 19:58:29.537 [info] Starting Excalidraw MCP server... +2026-03-02 19:58:29.537 [debug] Connecting to stdio transport... +2026-03-02 19:58:29.538 [info] Excalidraw MCP server running on stdio +2026-03-02 19:58:29.543 [info] Listing available tools diff --git a/litellm/llms/gemini/image_generation/transformation.py b/litellm/llms/gemini/image_generation/transformation.py index 73aef15e4c7..4b6be382993 100644 --- a/litellm/llms/gemini/image_generation/transformation.py +++ b/litellm/llms/gemini/image_generation/transformation.py @@ -92,12 +92,12 @@ def _transform_image_usage(self, usage_metadata: dict) -> ImageUsage: tokens_details = usage_metadata.get("promptTokensDetails", []) for details in tokens_details: if isinstance(details, dict): - modality = details.get("modality") - token_count = details.get("tokenCount", 0) + modality = str(details.get("modality", "")).upper() + token_count = details.get("tokenCount", details.get("token_count", 0)) if modality == "TEXT": - input_tokens_details.text_tokens = token_count + input_tokens_details.text_tokens += token_count elif modality == "IMAGE": - input_tokens_details.image_tokens = token_count + input_tokens_details.image_tokens += token_count return ImageUsage( input_tokens=usage_metadata.get("promptTokenCount", 0), @@ -274,4 +274,4 @@ def transform_image_generation_response( b64_json=prediction.get("bytesBase64Encoded", None), url=None, # Google AI returns base64, not URLs )) - return model_response \ No newline at end of file + return model_response diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index 0905f22362e..576d8389b86 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -1632,10 +1632,12 @@ def _calculate_usage( # noqa: PLR0915 if "responseTokensDetails" in usage_metadata: response_tokens_details = CompletionTokensDetailsWrapper() for detail in usage_metadata["responseTokensDetails"]: - if detail["modality"] == "TEXT": - response_tokens_details.text_tokens = detail.get("tokenCount", 0) - elif detail["modality"] == "AUDIO": - response_tokens_details.audio_tokens = detail.get("tokenCount", 0) + modality = str(detail.get("modality", "")).upper() + token_count = detail.get("tokenCount", detail.get("token_count", 0)) + if modality == "TEXT": + response_tokens_details.text_tokens = token_count + elif modality == "AUDIO": + response_tokens_details.audio_tokens = token_count ######################################################### @@ -1644,16 +1646,24 @@ def _calculate_usage( # noqa: PLR0915 if response_tokens_details is None: response_tokens_details = CompletionTokensDetailsWrapper() for detail in usage_metadata["candidatesTokensDetails"]: - modality = detail.get("modality") - token_count = detail.get("tokenCount", 0) + modality = str(detail.get("modality", "")).upper() + token_count = detail.get("tokenCount", detail.get("token_count", 0)) if modality == "TEXT": - response_tokens_details.text_tokens = token_count + response_tokens_details.text_tokens = ( + response_tokens_details.text_tokens or 0 + ) + token_count elif modality == "AUDIO": - response_tokens_details.audio_tokens = token_count + response_tokens_details.audio_tokens = ( + response_tokens_details.audio_tokens or 0 + ) + token_count elif modality == "IMAGE": - response_tokens_details.image_tokens = token_count + response_tokens_details.image_tokens = ( + response_tokens_details.image_tokens or 0 + ) + token_count elif modality == "VIDEO": - response_tokens_details.video_tokens = token_count + response_tokens_details.video_tokens = ( + response_tokens_details.video_tokens or 0 + ) + token_count # Calculate text_tokens if not explicitly provided in candidatesTokensDetails # candidatesTokenCount includes all modalities, so: text = total - (image + audio + video) @@ -1677,14 +1687,16 @@ def _calculate_usage( # noqa: PLR0915 ## Parse promptTokensDetails (total tokens by modality, includes cached + non-cached) if "promptTokensDetails" in usage_metadata: for detail in usage_metadata["promptTokensDetails"]: - if detail["modality"] == "AUDIO": - prompt_audio_tokens = detail.get("tokenCount", 0) - elif detail["modality"] == "TEXT": - prompt_text_tokens = detail.get("tokenCount", 0) - elif detail["modality"] == "IMAGE": - prompt_image_tokens = detail.get("tokenCount", 0) - elif detail["modality"] == "VIDEO": - prompt_video_tokens = detail.get("tokenCount", 0) + modality = str(detail.get("modality", "")).upper() + token_count = detail.get("tokenCount", detail.get("token_count", 0)) + if modality == "AUDIO": + prompt_audio_tokens = (prompt_audio_tokens or 0) + token_count + elif modality == "TEXT": + prompt_text_tokens = (prompt_text_tokens or 0) + token_count + elif modality == "IMAGE": + prompt_image_tokens = (prompt_image_tokens or 0) + token_count + elif modality == "VIDEO": + prompt_video_tokens = (prompt_video_tokens or 0) + token_count ## Parse cacheTokensDetails (breakdown of cached tokens by modality) ## When explicit caching is used, Gemini provides this field to show which modalities were cached @@ -1695,14 +1707,16 @@ def _calculate_usage( # noqa: PLR0915 if "cacheTokensDetails" in usage_metadata: for detail in usage_metadata["cacheTokensDetails"]: - if detail["modality"] == "AUDIO": - cached_audio_tokens = detail.get("tokenCount", 0) - elif detail["modality"] == "TEXT": - cached_text_tokens = detail.get("tokenCount", 0) - elif detail["modality"] == "IMAGE": - cached_image_tokens = detail.get("tokenCount", 0) - elif detail["modality"] == "VIDEO": - cached_video_tokens = detail.get("tokenCount", 0) + modality = str(detail.get("modality", "")).upper() + token_count = detail.get("tokenCount", detail.get("token_count", 0)) + if modality == "AUDIO": + cached_audio_tokens = (cached_audio_tokens or 0) + token_count + elif modality == "TEXT": + cached_text_tokens = (cached_text_tokens or 0) + token_count + elif modality == "IMAGE": + cached_image_tokens = (cached_image_tokens or 0) + token_count + elif modality == "VIDEO": + cached_video_tokens = (cached_video_tokens or 0) + token_count ## Calculate non-cached tokens by subtracting cached from total (per modality) ## This is necessary because promptTokensDetails includes both cached and non-cached tokens diff --git a/package-lock.json b/package-lock.json index 9e1314debcc..7f3ab1773c1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,5 +1,5 @@ { - "name": "litellm", + "name": "op-litellm", "lockfileVersion": 3, "requires": true, "packages": { @@ -54,6 +54,7 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -529,29 +530,6 @@ "dev": true, "license": "MIT" }, - "node_modules/@isaacs/balanced-match": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz", - "integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": "20 || >=22" - } - }, - "node_modules/@isaacs/brace-expansion": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz", - "integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==", - "dev": true, - "license": "MIT", - "dependencies": { - "@isaacs/balanced-match": "^4.0.1" - }, - "engines": { - "node": "20 || >=22" - } - }, "node_modules/@istanbuljs/load-nyc-config": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz", @@ -1222,6 +1200,7 @@ "integrity": "sha512-cisd7gxkzjBKU2GgdYrTdtQx1SORymWyaAFhaxQPK9bYO9ot3Y5OikQRvY0VYQtvwjeQnizCINJAenh/V7MK2w==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@types/prop-types": "*", "csstype": "^3.2.2" @@ -1497,11 +1476,14 @@ } }, "node_modules/balanced-match": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", - "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", + "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", "dev": true, - "license": "MIT" + "license": "MIT", + "engines": { + "node": "18 || 20 || >=22" + } }, "node_modules/baseline-browser-mapping": { "version": "2.8.30", @@ -1514,14 +1496,16 @@ } }, "node_modules/brace-expansion": { - "version": "1.1.12", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", - "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz", + "integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==", "dev": true, "license": "MIT", "dependencies": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" + "balanced-match": "^4.0.2" + }, + "engines": { + "node": "18 || 20 || >=22" } }, "node_modules/braces": { @@ -1557,6 +1541,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.25", "caniuse-lite": "^1.0.30001754", @@ -1791,13 +1776,6 @@ "dev": true, "license": "MIT" }, - "node_modules/concat-map": { - "version": "0.0.1", - "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", - "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", - "dev": true, - "license": "MIT" - }, "node_modules/convert-source-map": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", @@ -4090,16 +4068,16 @@ } }, "node_modules/minimatch": { - "version": "10.1.1", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz", - "integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==", + "version": "10.2.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz", + "integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==", "dev": true, "license": "BlueOak-1.0.0", "dependencies": { - "@isaacs/brace-expansion": "^5.0.0" + "brace-expansion": "^5.0.2" }, "engines": { - "node": "20 || >=22" + "node": "18 || 20 || >=22" }, "funding": { "url": "https://github.com/sponsors/isaacs" @@ -4712,7 +4690,6 @@ "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "loose-envify": "^1.1.0" } @@ -5068,19 +5045,6 @@ "node": ">=8" } }, - "node_modules/test-exclude/node_modules/minimatch": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", - "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", - "dev": true, - "license": "ISC", - "dependencies": { - "brace-expansion": "^1.1.7" - }, - "engines": { - "node": "*" - } - }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", diff --git a/scripts/repro_gemini_image_cost.py b/scripts/repro_gemini_image_cost.py new file mode 100644 index 00000000000..d2669174ab5 --- /dev/null +++ b/scripts/repro_gemini_image_cost.py @@ -0,0 +1,50 @@ +import os +import litellm +from litellm.llms.gemini.image_generation.transformation import GoogleImageGenConfig +from litellm.types.utils import ImageObject, ImageResponse + +os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" +litellm.model_cost = litellm.get_model_cost_map(url="") + +model = "gemini/gemini-3-pro-image-preview" +config = GoogleImageGenConfig() + +usage_metadata = { + "promptTokenCount": 200, + "candidatesTokenCount": 0, + "totalTokenCount": 200, + "promptTokensDetails": [ + {"modality": "TEXT", "tokenCount": 10}, + {"modality": "IMAGE", "tokenCount": 90}, + {"modality": "IMAGE", "tokenCount": 100}, + ], +} + +parsed_usage = config._transform_image_usage(usage_metadata) +resp = ImageResponse( + data=[ImageObject(b64_json="fake_image_data")], + usage=parsed_usage, +) + +observed_cost = litellm.completion_cost( + completion_response=resp, + model=model, + custom_llm_provider="gemini", +) + +model_info = litellm.get_model_info(model=model, custom_llm_provider="gemini") +input_cost_per_token = model_info["input_cost_per_token"] + +expected_prompt_tokens = 10 + 90 + 100 +expected_prompt_cost = expected_prompt_tokens * input_cost_per_token + +print(f"\n--- Results for {model} ---") +print(f"Input Tokens (Total): {parsed_usage.input_tokens}") +print(f"Image Tokens: {parsed_usage.input_tokens_details.image_tokens}") +print(f"Observed Cost: ${observed_cost:.8f}") +print(f"Expected Cost: ${expected_prompt_cost:.8f}") + +if abs(observed_cost - expected_prompt_cost) < 1e-10: + print("\n✅ SUCCESS: The fix works! Tokens are accumulated correctly.") +else: + print("\n❌ FAILED: The cost or tokens don't match.") From 49adc6352b3555495ef7b8539c3466b707f552bd Mon Sep 17 00:00:00 2001 From: gustipardo Date: Mon, 2 Mar 2026 21:35:11 -0300 Subject: [PATCH 2/4] test: add regression test for image token accumulation and cleanup files --- excalidraw.log | 4 - package-lock.json | 82 +++++++++++++------ scripts/repro_gemini_image_cost.py | 50 ----------- .../test_gemini_image_usage.py | 55 +++++++++++++ 4 files changed, 114 insertions(+), 77 deletions(-) delete mode 100644 excalidraw.log delete mode 100644 scripts/repro_gemini_image_cost.py diff --git a/excalidraw.log b/excalidraw.log deleted file mode 100644 index 1bc8507c2e1..00000000000 --- a/excalidraw.log +++ /dev/null @@ -1,4 +0,0 @@ -2026-03-02 19:58:29.537 [info] Starting Excalidraw MCP server... -2026-03-02 19:58:29.537 [debug] Connecting to stdio transport... -2026-03-02 19:58:29.538 [info] Excalidraw MCP server running on stdio -2026-03-02 19:58:29.543 [info] Listing available tools diff --git a/package-lock.json b/package-lock.json index 7f3ab1773c1..9e1314debcc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,5 +1,5 @@ { - "name": "op-litellm", + "name": "litellm", "lockfileVersion": 3, "requires": true, "packages": { @@ -54,7 +54,6 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -530,6 +529,29 @@ "dev": true, "license": "MIT" }, + "node_modules/@isaacs/balanced-match": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz", + "integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": "20 || >=22" + } + }, + "node_modules/@isaacs/brace-expansion": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz", + "integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@isaacs/balanced-match": "^4.0.1" + }, + "engines": { + "node": "20 || >=22" + } + }, "node_modules/@istanbuljs/load-nyc-config": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz", @@ -1200,7 +1222,6 @@ "integrity": "sha512-cisd7gxkzjBKU2GgdYrTdtQx1SORymWyaAFhaxQPK9bYO9ot3Y5OikQRvY0VYQtvwjeQnizCINJAenh/V7MK2w==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@types/prop-types": "*", "csstype": "^3.2.2" @@ -1476,14 +1497,11 @@ } }, "node_modules/balanced-match": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz", - "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==", + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", "dev": true, - "license": "MIT", - "engines": { - "node": "18 || 20 || >=22" - } + "license": "MIT" }, "node_modules/baseline-browser-mapping": { "version": "2.8.30", @@ -1496,16 +1514,14 @@ } }, "node_modules/brace-expansion": { - "version": "5.0.4", - "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.4.tgz", - "integrity": "sha512-h+DEnpVvxmfVefa4jFbCf5HdH5YMDXRsmKflpf1pILZWRFlTbJpxeU55nJl4Smt5HQaGzg1o6RHFPJaOqnmBDg==", + "version": "1.1.12", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", + "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==", "dev": true, "license": "MIT", "dependencies": { - "balanced-match": "^4.0.2" - }, - "engines": { - "node": "18 || 20 || >=22" + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" } }, "node_modules/braces": { @@ -1541,7 +1557,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.25", "caniuse-lite": "^1.0.30001754", @@ -1776,6 +1791,13 @@ "dev": true, "license": "MIT" }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true, + "license": "MIT" + }, "node_modules/convert-source-map": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", @@ -4068,16 +4090,16 @@ } }, "node_modules/minimatch": { - "version": "10.2.4", - "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.4.tgz", - "integrity": "sha512-oRjTw/97aTBN0RHbYCdtF1MQfvusSIBQM0IZEgzl6426+8jSC0nF1a/GmnVLpfB9yyr6g6FTqWqiZVbxrtaCIg==", + "version": "10.1.1", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz", + "integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==", "dev": true, "license": "BlueOak-1.0.0", "dependencies": { - "brace-expansion": "^5.0.2" + "@isaacs/brace-expansion": "^5.0.0" }, "engines": { - "node": "18 || 20 || >=22" + "node": "20 || >=22" }, "funding": { "url": "https://github.com/sponsors/isaacs" @@ -4690,6 +4712,7 @@ "integrity": "sha512-UOShsPwz7NrMUqhR6t0hWjFduvOzbtv7toDH1/hIrfRNIDBnnBWd0CwJTGvTpngVlmwGCdP9/Zl/tVrDqcuYzQ==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "loose-envify": "^1.1.0" } @@ -5045,6 +5068,19 @@ "node": ">=8" } }, + "node_modules/test-exclude/node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, "node_modules/tmpl": { "version": "1.0.5", "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", diff --git a/scripts/repro_gemini_image_cost.py b/scripts/repro_gemini_image_cost.py deleted file mode 100644 index d2669174ab5..00000000000 --- a/scripts/repro_gemini_image_cost.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -import litellm -from litellm.llms.gemini.image_generation.transformation import GoogleImageGenConfig -from litellm.types.utils import ImageObject, ImageResponse - -os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" -litellm.model_cost = litellm.get_model_cost_map(url="") - -model = "gemini/gemini-3-pro-image-preview" -config = GoogleImageGenConfig() - -usage_metadata = { - "promptTokenCount": 200, - "candidatesTokenCount": 0, - "totalTokenCount": 200, - "promptTokensDetails": [ - {"modality": "TEXT", "tokenCount": 10}, - {"modality": "IMAGE", "tokenCount": 90}, - {"modality": "IMAGE", "tokenCount": 100}, - ], -} - -parsed_usage = config._transform_image_usage(usage_metadata) -resp = ImageResponse( - data=[ImageObject(b64_json="fake_image_data")], - usage=parsed_usage, -) - -observed_cost = litellm.completion_cost( - completion_response=resp, - model=model, - custom_llm_provider="gemini", -) - -model_info = litellm.get_model_info(model=model, custom_llm_provider="gemini") -input_cost_per_token = model_info["input_cost_per_token"] - -expected_prompt_tokens = 10 + 90 + 100 -expected_prompt_cost = expected_prompt_tokens * input_cost_per_token - -print(f"\n--- Results for {model} ---") -print(f"Input Tokens (Total): {parsed_usage.input_tokens}") -print(f"Image Tokens: {parsed_usage.input_tokens_details.image_tokens}") -print(f"Observed Cost: ${observed_cost:.8f}") -print(f"Expected Cost: ${expected_prompt_cost:.8f}") - -if abs(observed_cost - expected_prompt_cost) < 1e-10: - print("\n✅ SUCCESS: The fix works! Tokens are accumulated correctly.") -else: - print("\n❌ FAILED: The cost or tokens don't match.") diff --git a/tests/llm_translation/test_gemini_image_usage.py b/tests/llm_translation/test_gemini_image_usage.py index 8c7f05d38e0..0497d7fd9d7 100644 --- a/tests/llm_translation/test_gemini_image_usage.py +++ b/tests/llm_translation/test_gemini_image_usage.py @@ -4,9 +4,11 @@ This test verifies the fix for issue #18323 where image_generation() was returning usage=0 while completion() returned proper token usage. """ +import os import pytest from unittest.mock import patch, MagicMock import litellm +from litellm.llms.gemini.image_generation.transformation import GoogleImageGenConfig from litellm.types.utils import ImageResponse, ImageObject, ImageUsage @@ -211,3 +213,56 @@ def test_gemini_imagen_models_no_usage_extraction(): # For Imagen models, we don't extract usage from the predictions format # This test just ensures we don't crash + + +def test_gemini_image_generation_accumulates_multiple_image_prompt_token_details(): + """ + Regression test: promptTokensDetails can include multiple IMAGE entries. + These must be accumulated instead of overwritten. + """ + previous_local_model_cost_map = os.environ.get("LITELLM_LOCAL_MODEL_COST_MAP") + previous_model_cost = litellm.model_cost + try: + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + model = "gemini/gemini-3-pro-image-preview" + config = GoogleImageGenConfig() + + usage_metadata = { + "promptTokenCount": 200, + "candidatesTokenCount": 0, + "totalTokenCount": 200, + "promptTokensDetails": [ + {"modality": "TEXT", "tokenCount": 10}, + {"modality": "IMAGE", "tokenCount": 90}, + {"modality": "IMAGE", "tokenCount": 100}, + ], + } + + parsed_usage = config._transform_image_usage(usage_metadata) + image_response = ImageResponse( + data=[ImageObject(b64_json="fake_image_data")], + usage=parsed_usage, + ) + + observed_cost = litellm.completion_cost( + completion_response=image_response, + model=model, + custom_llm_provider="gemini", + ) + + model_info = litellm.get_model_info(model=model, custom_llm_provider="gemini") + expected_image_tokens = 190 + expected_total_prompt_tokens = 200 + expected_prompt_cost = expected_total_prompt_tokens * model_info["input_cost_per_token"] + + assert parsed_usage.input_tokens_details.image_tokens == expected_image_tokens + assert parsed_usage.input_tokens_details.text_tokens == 10 + assert observed_cost == pytest.approx(expected_prompt_cost, rel=1e-12) + finally: + if previous_local_model_cost_map is None: + os.environ.pop("LITELLM_LOCAL_MODEL_COST_MAP", None) + else: + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = previous_local_model_cost_map + litellm.model_cost = previous_model_cost From 36b3cd19fad192590dc6faad940ca8b863b6720d Mon Sep 17 00:00:00 2001 From: gustipardo Date: Mon, 2 Mar 2026 21:50:20 -0300 Subject: [PATCH 3/4] fix(gemini): ensure consistent accumulation for responseTokensDetails --- .../gemini/vertex_and_google_ai_studio_gemini.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index 576d8389b86..18d0f8c1830 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -1635,9 +1635,13 @@ def _calculate_usage( # noqa: PLR0915 modality = str(detail.get("modality", "")).upper() token_count = detail.get("tokenCount", detail.get("token_count", 0)) if modality == "TEXT": - response_tokens_details.text_tokens = token_count + response_tokens_details.text_tokens = ( + response_tokens_details.text_tokens or 0 + ) + token_count elif modality == "AUDIO": - response_tokens_details.audio_tokens = token_count + response_tokens_details.audio_tokens = ( + response_tokens_details.audio_tokens or 0 + ) + token_count ######################################################### From de63dd8d5e1ebc8b76e9b0e4c37418859d243091 Mon Sep 17 00:00:00 2001 From: gustipardo Date: Tue, 3 Mar 2026 12:20:27 -0300 Subject: [PATCH 4/4] fix(gemini): harden token count parsing and add vertex accumulation test Parse tokenCount/token_count as int-safe values to satisfy mypy and avoid None/object arithmetic. Add regression test for duplicate modality accumulation in Vertex _calculate_usage. --- .../gemini/image_generation/transformation.py | 5 ++- .../vertex_and_google_ai_studio_gemini.py | 13 +++++-- ...test_vertex_and_google_ai_studio_gemini.py | 37 ++++++++++++++++++- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/litellm/llms/gemini/image_generation/transformation.py b/litellm/llms/gemini/image_generation/transformation.py index 4b6be382993..6716d9a1384 100644 --- a/litellm/llms/gemini/image_generation/transformation.py +++ b/litellm/llms/gemini/image_generation/transformation.py @@ -93,7 +93,10 @@ def _transform_image_usage(self, usage_metadata: dict) -> ImageUsage: for details in tokens_details: if isinstance(details, dict): modality = str(details.get("modality", "")).upper() - token_count = details.get("tokenCount", details.get("token_count", 0)) + raw_token_count = details.get( + "tokenCount", details.get("token_count", 0) + ) + token_count = raw_token_count if isinstance(raw_token_count, int) else 0 if modality == "TEXT": input_tokens_details.text_tokens += token_count elif modality == "IMAGE": diff --git a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py index 18d0f8c1830..2cec30dd39a 100644 --- a/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py +++ b/litellm/llms/vertex_ai/gemini/vertex_and_google_ai_studio_gemini.py @@ -1623,6 +1623,11 @@ def _calculate_usage( # noqa: PLR0915 response_tokens: Optional[int] = None response_tokens_details: Optional[CompletionTokensDetailsWrapper] = None usage_metadata = completion_response["usageMetadata"] + + def _get_token_count(detail: dict) -> int: + raw_token_count = detail.get("tokenCount", detail.get("token_count", 0)) + return raw_token_count if isinstance(raw_token_count, int) else 0 + if "cachedContentTokenCount" in usage_metadata: cached_tokens = usage_metadata["cachedContentTokenCount"] @@ -1633,7 +1638,7 @@ def _calculate_usage( # noqa: PLR0915 response_tokens_details = CompletionTokensDetailsWrapper() for detail in usage_metadata["responseTokensDetails"]: modality = str(detail.get("modality", "")).upper() - token_count = detail.get("tokenCount", detail.get("token_count", 0)) + token_count = _get_token_count(detail) if modality == "TEXT": response_tokens_details.text_tokens = ( response_tokens_details.text_tokens or 0 @@ -1651,7 +1656,7 @@ def _calculate_usage( # noqa: PLR0915 response_tokens_details = CompletionTokensDetailsWrapper() for detail in usage_metadata["candidatesTokensDetails"]: modality = str(detail.get("modality", "")).upper() - token_count = detail.get("tokenCount", detail.get("token_count", 0)) + token_count = _get_token_count(detail) if modality == "TEXT": response_tokens_details.text_tokens = ( response_tokens_details.text_tokens or 0 @@ -1692,7 +1697,7 @@ def _calculate_usage( # noqa: PLR0915 if "promptTokensDetails" in usage_metadata: for detail in usage_metadata["promptTokensDetails"]: modality = str(detail.get("modality", "")).upper() - token_count = detail.get("tokenCount", detail.get("token_count", 0)) + token_count = _get_token_count(detail) if modality == "AUDIO": prompt_audio_tokens = (prompt_audio_tokens or 0) + token_count elif modality == "TEXT": @@ -1712,7 +1717,7 @@ def _calculate_usage( # noqa: PLR0915 if "cacheTokensDetails" in usage_metadata: for detail in usage_metadata["cacheTokensDetails"]: modality = str(detail.get("modality", "")).upper() - token_count = detail.get("tokenCount", detail.get("token_count", 0)) + token_count = _get_token_count(detail) if modality == "AUDIO": cached_audio_tokens = (cached_audio_tokens or 0) + token_count elif modality == "TEXT": diff --git a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py index 8beb19bf1ac..0f8ae715439 100644 --- a/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py +++ b/tests/test_litellm/llms/vertex_ai/gemini/test_vertex_and_google_ai_studio_gemini.py @@ -862,6 +862,42 @@ def test_vertex_ai_usage_metadata_with_image_tokens_in_prompt(): ) +def test_vertex_ai_usage_metadata_accumulates_duplicate_modalities(): + """Ensure _calculate_usage accumulates repeated modality entries.""" + v = VertexGeminiConfig() + usage_metadata = { + "promptTokenCount": 210, + "candidatesTokenCount": 50, + "totalTokenCount": 260, + "promptTokensDetails": [ + {"modality": "TEXT", "tokenCount": 20}, + {"modality": "IMAGE", "tokenCount": 90}, + {"modality": "IMAGE", "token_count": 100}, + ], + "candidatesTokensDetails": [ + {"modality": "IMAGE", "tokenCount": 30}, + {"modality": "TEXT", "tokenCount": 15}, + {"modality": "TEXT", "token_count": 5}, + ], + "cacheTokensDetails": [ + {"modality": "TEXT", "tokenCount": 4}, + {"modality": "IMAGE", "tokenCount": 40}, + {"modality": "IMAGE", "token_count": 10}, + ], + } + usage_metadata = UsageMetadata(**usage_metadata) + result = v._calculate_usage(completion_response={"usageMetadata": usage_metadata}) + + # prompt details are total - cached per modality + assert result.prompt_tokens_details.text_tokens == 16 # 20 - 4 + assert result.prompt_tokens_details.image_tokens == 140 # (90 + 100) - (40 + 10) + + # candidates details accumulate duplicate modalities + assert result.completion_tokens_details.text_tokens == 20 # 15 + 5 + assert result.completion_tokens_details.image_tokens == 30 + assert result.completion_tokens == 50 + + def test_vertex_ai_map_thinking_param_with_budget_tokens_0(): """ If budget_tokens is 0, do not set includeThoughts to True @@ -3723,4 +3759,3 @@ def test_vertex_ai_usage_metadata_video_tokens_with_caching(): "Prompt video tokens should be 10240 - 5120 (cached) = 5120" assert result.prompt_tokens_details.text_tokens == 9 assert result.prompt_tokens_details.audio_tokens == 200 -