From f816ffbd047346bce6353ec686ff26f6e1c4d7c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=92=D0=BE?= =?UTF-8?q?=D0=BB=D0=BE=D0=B1=D1=83=D0=B5=D0=B2?= <77577658+simon100500@users.noreply.github.com> Date: Mon, 16 Mar 2026 00:26:19 +0300 Subject: [PATCH] fix(pipeline): handle duplicate finish_reason chunks from OpenRouter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some OpenRouter model providers (e.g. google/gemini-3.1-flash-lite-preview) send two consecutive SSE chunks with finish_reason='tool_calls'. The second chunk arrives after streamingToolCallParser.reset() has been called, so it carries empty parts — no functionCall entries. The original handleChunkMerging treated every finish chunk as authoritative and overwrote pendingFinishResponse, discarding the functionCall parts that were correctly assembled from the first finish chunk. Fix: when a second finish chunk arrives and a pendingFinishResponse already exists, only merge usageMetadata (if present) and keep the candidates from the first finish chunk. --- .../core/openaiContentGenerator/pipeline.ts | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/packages/core/src/core/openaiContentGenerator/pipeline.ts b/packages/core/src/core/openaiContentGenerator/pipeline.ts index 5c6cdc6824..4e2d42bd80 100644 --- a/packages/core/src/core/openaiContentGenerator/pipeline.ts +++ b/packages/core/src/core/openaiContentGenerator/pipeline.ts @@ -255,9 +255,23 @@ export class ContentGenerationPipeline { .candidates?.[0]?.finishReason; if (isFinishChunk) { - // This is a finish reason chunk - collectedGeminiResponses.push(response); - setPendingFinish(response); + if (hasPendingFinish) { + // Duplicate finish chunk (e.g. from OpenRouter providers that send two + // finish_reason chunks for tool calls). The streaming tool call parser + // was already reset after the first finish chunk, so the second one + // carries no functionCall parts. Merge only usageMetadata and keep the + // candidates (including functionCall parts) from the first finish chunk. + const lastResponse = + collectedGeminiResponses[collectedGeminiResponses.length - 1]; + if (response.usageMetadata) { + lastResponse.usageMetadata = response.usageMetadata; + } + setPendingFinish(lastResponse); + } else { + // This is a finish reason chunk + collectedGeminiResponses.push(response); + setPendingFinish(response); + } return false; // Don't yield yet, wait for potential subsequent chunks to merge } else if (hasPendingFinish) { // We have a pending finish chunk, merge this chunk's data into it