diff --git a/packages/core/src/services/chatCompressionService.test.ts b/packages/core/src/services/chatCompressionService.test.ts index f3f490214d..55a1e2723c 100644 --- a/packages/core/src/services/chatCompressionService.test.ts +++ b/packages/core/src/services/chatCompressionService.test.ts @@ -100,6 +100,173 @@ describe('findCompressSplitPoint', () => { ]; expect(findCompressSplitPoint(historyWithEmptyParts, 0.5)).toBe(2); }); + + it('should compress everything when last message is a functionResponse', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix this bug' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'readFile', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'readFile', + response: { result: 'file content' }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'writeFile', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'writeFile', + response: { result: 'ok' }, + }, + }, + ], + }, + ]; + // Last message is functionResponse -> safe to compress everything + expect(findCompressSplitPoint(history, 0.7)).toBe(5); + }); + + it('should return primary split point when tool completions have no subsequent regular user message', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix this' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'read1', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read1', + response: { result: 'a'.repeat(1000) }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'read2', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read2', + response: { result: 'b'.repeat(1000) }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'write1', args: {} } }], + }, + ]; + // Only one non-functionResponse user message (index 0) -> lastSplitPoint=0 + // Last message has functionCall -> can't compress everything + // historyToKeep must start with a regular user message, so split at 0 + // (compress nothing) is the only valid option. + expect(findCompressSplitPoint(history, 0.7)).toBe(0); + }); + + it('should prefer primary split point when tool completions yield no valid user-starting split', () => { + const longContent = 'a'.repeat(10000); + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix bug A' }] }, + { role: 'model', parts: [{ text: 'OK' }] }, + { role: 'user', parts: [{ text: 'Fix bug B' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'read1', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read1', + response: { result: longContent }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'read2', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read2', + response: { result: longContent }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'write1', args: {} } }], + }, + ]; + // Primary split points at 0 and 2 (regular user messages before the bulky tool outputs) + // Last message has functionCall -> can't compress everything + // Should return lastSplitPoint=2 (last valid primary split point) + expect(findCompressSplitPoint(history, 0.7)).toBe(2); + }); + + it('should still prefer primary split point when it is better', () => { + const history: Content[] = [ + { role: 'user', parts: [{ text: 'msg1' }] }, + { role: 'model', parts: [{ text: 'resp1' }] }, + { + role: 'user', + parts: [{ text: 'msg2 with some substantial content here' }], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'tool1', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'tool1', + response: { result: 'short' }, + }, + }, + ], + }, + { role: 'user', parts: [{ text: 'msg3' }] }, + { role: 'model', parts: [{ text: 'resp3' }] }, + { role: 'user', parts: [{ text: 'msg4' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'tool2', args: {} } }], + }, + ]; + // Primary split points: 0, 2, 5, 7 + // Last message has functionCall -> can't compress everything + // At 0.99 fraction, lastSplitPoint should be 7 + expect(findCompressSplitPoint(history, 0.99)).toBe(7); + }); }); describe('ChatCompressionService', () => { @@ -240,6 +407,47 @@ describe('ChatCompressionService', () => { expect(tokenLimit).not.toHaveBeenCalled(); }); + it('should return NOOP when historyToCompress is below MIN_COMPRESSION_FRACTION of total', async () => { + // Construct a history where the split point lands on the 2nd regular user + // message (index 2), but indices 0-1 are tiny relative to the huge content + // at index 2. historyToCompress = [0,1] will be << 5% of totalCharCount. + const hugeContent = 'x'.repeat(100000); + const history: Content[] = [ + { role: 'user', parts: [{ text: 'hello' }] }, + { role: 'model', parts: [{ text: 'world' }] }, + // Huge user message pushes the cumulative well past the split threshold + { role: 'user', parts: [{ text: hugeContent }] }, + // Pending functionCall prevents returning contents.length, + // so the fallback split at index 2 is used + { + role: 'model', + parts: [{ functionCall: { name: 'process', args: {} } }], + }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue(100); + vi.mocked(tokenLimit).mockReturnValue(1000); + + const mockGenerateContent = vi.fn(); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + // force=true bypasses the token threshold gate so we exercise the 5% guard + const result = await service.compress( + mockChat, + mockPromptId, + true, + mockModel, + mockConfig, + false, + ); + + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(result.newHistory).toBeNull(); + expect(mockGenerateContent).not.toHaveBeenCalled(); + }); + it('should compress if over token threshold', async () => { const history: Content[] = [ { role: 'user', parts: [{ text: 'msg1' }] }, @@ -934,4 +1142,149 @@ describe('ChatCompressionService', () => { expect(mockFirePreCompactEvent).not.toHaveBeenCalled(); }); }); + + describe('orphaned trailing funcCall handling', () => { + it('should compress everything when force=true and last message is an orphaned funcCall', async () => { + // Issue #2647: tool-heavy conversation interrupted/crashed while a tool + // was still running. The funcCall will never get a response since the agent + // is idle. Manual /compress strips the orphaned funcCall, then compresses + // the remaining history normally. + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'glob', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'glob', + response: { result: 'files...' }, + }, + }, + ], + }, + { + role: 'model', + parts: [{ functionCall: { name: 'readFile', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'readFile', + response: { result: 'code...' }, + }, + }, + ], + }, + // orphaned funcCall — agent was interrupted before getting a response + { + role: 'model', + parts: [{ functionCall: { name: 'editFile', args: {} } }], + }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 100, + ); + vi.mocked(tokenLimit).mockReturnValue(1000); + + const mockGenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { content: { parts: [{ text: 'Summary of all work done' }] } }, + ], + usageMetadata: { + promptTokenCount: 1100, + candidatesTokenCount: 50, + totalTokenCount: 1150, + }, + } as unknown as GenerateContentResponse); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + true, // force=true (manual /compress) + mockModel, + mockConfig, + false, + ); + + // Should compress successfully — orphaned funcCall is stripped first, then + // normal compression runs on the remaining history, historyToKeep is empty + expect(result.info.compressionStatus).toBe(CompressionStatus.COMPRESSED); + expect(result.newHistory).not.toBeNull(); + // Reconstructed history: [User(summary), Model("Got it...")] — valid structure + expect(result.newHistory).toHaveLength(2); + expect(result.newHistory![0].role).toBe('user'); + expect(result.newHistory![1].role).toBe('model'); + // The orphaned funcCall is stripped before compression, so only the first 5 + // messages are sent, plus the compression instruction (+1) = history.length total. + const callArg = mockGenerateContent.mock.calls[0][0]; + expect(callArg.contents.length).toBe(history.length); // (history.length - 1) messages + 1 instruction + }); + + it('should NOT compress orphaned funcCall when force=false (auto-compress)', async () => { + // Auto-compress fires BEFORE the matching funcResponse is sent back to the + // model. Compressing the funcCall away would orphan the upcoming funcResponse + // and cause an API error. So force=false must NOT take this path. + const history: Content[] = [ + { role: 'user', parts: [{ text: 'Fix all TypeScript errors.' }] }, + { + role: 'model', + parts: [{ functionCall: { name: 'glob', args: {} } }], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'glob', + response: { result: 'files...' }, + }, + }, + ], + }, + // Pending funcCall: tool is currently executing, funcResponse is coming + { + role: 'model', + parts: [{ functionCall: { name: 'readFile', args: {} } }], + }, + ]; + vi.mocked(mockChat.getHistory).mockReturnValue(history); + // Use a token count above threshold to ensure auto-compress isn't skipped + vi.mocked(uiTelemetryService.getLastPromptTokenCount).mockReturnValue( + 800, + ); + vi.mocked(mockConfig.getContentGeneratorConfig).mockReturnValue({ + model: 'gemini-pro', + contextWindowSize: 1000, + } as unknown as ReturnType); + + const mockGenerateContent = vi.fn(); + vi.mocked(mockConfig.getContentGenerator).mockReturnValue({ + generateContent: mockGenerateContent, + } as unknown as ContentGenerator); + + const result = await service.compress( + mockChat, + mockPromptId, + false, // force=false (auto-compress) + mockModel, + mockConfig, + false, + ); + + // Must return NOOP — compressing would orphan the upcoming funcResponse + expect(result.info.compressionStatus).toBe(CompressionStatus.NOOP); + expect(result.newHistory).toBeNull(); + expect(mockGenerateContent).not.toHaveBeenCalled(); + }); + }); }); diff --git a/packages/core/src/services/chatCompressionService.ts b/packages/core/src/services/chatCompressionService.ts index 610445fb42..528a57b446 100644 --- a/packages/core/src/services/chatCompressionService.ts +++ b/packages/core/src/services/chatCompressionService.ts @@ -29,6 +29,13 @@ export const COMPRESSION_TOKEN_THRESHOLD = 0.7; */ export const COMPRESSION_PRESERVE_THRESHOLD = 0.3; +/** + * Minimum fraction of history (by character count) that must be compressible + * to proceed with a compression API call. Prevents futile calls where the + * model receives almost no context and generates a useless summary. + */ +export const MIN_COMPRESSION_FRACTION = 0.05; + /** * Returns the index of the oldest item to keep when compressing. May return * contents.length which indicates that everything should be compressed. @@ -72,8 +79,15 @@ export function findCompressSplitPoint( ) { return contents.length; } + // Also safe to compress everything if the last message completes a tool call + // sequence (all function calls have matching responses). + if ( + lastContent?.role === 'user' && + lastContent?.parts?.some((part) => !!part.functionResponse) + ) { + return contents.length; + } - // Can't compress everything so just compress at last splitpoint. return lastSplitPoint; } @@ -138,13 +152,31 @@ export class ChatCompressionService { } } + // For manual /compress (force=true), if the last message is an orphaned model + // funcCall (agent interrupted/crashed before the response arrived), strip it + // before computing the split point. After stripping, the history ends cleanly + // (typically with a user funcResponse) and findCompressSplitPoint handles it + // through its normal logic — no special-casing needed. + // + // auto-compress (force=false) must NOT strip: it fires inside + // sendMessageStream() before the matching funcResponse is pushed onto the + // history, so the trailing funcCall is still active, not orphaned. + const lastMessage = curatedHistory[curatedHistory.length - 1]; + const hasOrphanedFuncCall = + force && + lastMessage?.role === 'model' && + lastMessage.parts?.some((p) => !!p.functionCall); + const historyForSplit = hasOrphanedFuncCall + ? curatedHistory.slice(0, -1) + : curatedHistory; + const splitPoint = findCompressSplitPoint( - curatedHistory, + historyForSplit, 1 - COMPRESSION_PRESERVE_THRESHOLD, ); - const historyToCompress = curatedHistory.slice(0, splitPoint); - const historyToKeep = curatedHistory.slice(splitPoint); + const historyToCompress = historyForSplit.slice(0, splitPoint); + const historyToKeep = historyForSplit.slice(splitPoint); if (historyToCompress.length === 0) { return { @@ -157,6 +189,35 @@ export class ChatCompressionService { }; } + // Guard: if historyToCompress is too small relative to the total history, + // skip compression. This prevents futile API calls where the model receives + // almost no context and generates a useless "summary" that inflates tokens. + // + // Note: findCompressSplitPoint already computes charCounts internally but + // returns only the split index. We intentionally recompute here to keep + // the function signature simple; this is a minor, acceptable duplication. + const compressCharCount = historyToCompress.reduce( + (sum, c) => sum + JSON.stringify(c).length, + 0, + ); + const totalCharCount = historyForSplit.reduce( + (sum, c) => sum + JSON.stringify(c).length, + 0, + ); + if ( + totalCharCount > 0 && + compressCharCount / totalCharCount < MIN_COMPRESSION_FRACTION + ) { + return { + newHistory: null, + info: { + originalTokenCount, + newTokenCount: originalTokenCount, + compressionStatus: CompressionStatus.NOOP, + }, + }; + } + const summaryResponse = await config.getContentGenerator().generateContent( { model,