diff --git a/docs/users/configuration/settings.md b/docs/users/configuration/settings.md index f27140fecc..f73d9416cd 100644 --- a/docs/users/configuration/settings.md +++ b/docs/users/configuration/settings.md @@ -196,17 +196,19 @@ The `extra_body` field allows you to add custom parameters to the request body s #### context -| Setting | Type | Description | Default | -| ------------------------------------------------- | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -| `context.fileName` | string or array of strings | The name of the context file(s). | `undefined` | -| `context.importFormat` | string | The format to use when importing memory. | `undefined` | -| `context.includeDirectories` | array | Additional directories to include in the workspace context. Specifies an array of additional absolute or relative paths to include in the workspace context. Missing directories will be skipped with a warning by default. Paths can use `~` to refer to the user's home directory. This setting can be combined with the `--include-directories` command-line flag. | `[]` | -| `context.loadFromIncludeDirectories` | boolean | Controls the behavior of the `/memory refresh` command. If set to `true`, `QWEN.md` files should be loaded from all directories that are added. If set to `false`, `QWEN.md` should only be loaded from the current directory. | `false` | -| `context.fileFiltering.respectGitIgnore` | boolean | Respect .gitignore files when searching. | `true` | -| `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` | -| `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` | -| `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` | -| `context.gapThresholdMinutes` | number | Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with typical provider prompt-cache TTL. Set higher if your provider has a longer cache TTL. | `5` | +| Setting | Type | Description | Default | +| -------------------------------------------------------- | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | +| `context.fileName` | string or array of strings | The name of the context file(s). | `undefined` | +| `context.importFormat` | string | The format to use when importing memory. | `undefined` | +| `context.includeDirectories` | array | Additional directories to include in the workspace context. Specifies an array of additional absolute or relative paths to include in the workspace context. Missing directories will be skipped with a warning by default. Paths can use `~` to refer to the user's home directory. This setting can be combined with the `--include-directories` command-line flag. | `[]` | +| `context.loadFromIncludeDirectories` | boolean | Controls the behavior of the `/memory refresh` command. If set to `true`, `QWEN.md` files should be loaded from all directories that are added. If set to `false`, `QWEN.md` should only be loaded from the current directory. | `false` | +| `context.fileFiltering.respectGitIgnore` | boolean | Respect .gitignore files when searching. | `true` | +| `context.fileFiltering.respectQwenIgnore` | boolean | Respect .qwenignore files when searching. | `true` | +| `context.fileFiltering.enableRecursiveFileSearch` | boolean | Whether to enable searching recursively for filenames under the current tree when completing `@` prefixes in the prompt. | `true` | +| `context.fileFiltering.enableFuzzySearch` | boolean | When `true`, enables fuzzy search capabilities when searching for files. Set to `false` to improve performance on projects with a large number of files. | `true` | +| `context.clearContextOnIdle.thinkingThresholdMinutes` | number | Minutes of inactivity before clearing old thinking blocks to free context tokens. Aligns with typical provider prompt-cache TTL. Use `-1` to disable. | `5` | +| `context.clearContextOnIdle.toolResultsThresholdMinutes` | number | Minutes of inactivity before clearing old tool result content. Use `-1` to disable. | `60` | +| `context.clearContextOnIdle.toolResultsNumToKeep` | number | Number of most-recent compactable tool results to preserve when clearing. Floor at 1. | `5` | #### Troubleshooting File Search Performance diff --git a/packages/cli/src/config/config.ts b/packages/cli/src/config/config.ts index 2b64685f79..a2073a44c2 100755 --- a/packages/cli/src/config/config.ts +++ b/packages/cli/src/config/config.ts @@ -1068,8 +1068,8 @@ export async function loadCliConfig( }, telemetry: telemetrySettings, usageStatisticsEnabled: settings.privacy?.usageStatisticsEnabled ?? true, + clearContextOnIdle: settings.context?.clearContextOnIdle, fileFiltering: settings.context?.fileFiltering, - thinkingIdleThresholdMinutes: settings.context?.gapThresholdMinutes, checkpointing: argv.checkpointing || settings.general?.checkpointing?.enabled, proxy: diff --git a/packages/cli/src/config/settingsSchema.ts b/packages/cli/src/config/settingsSchema.ts index 116589dd44..7a0e094412 100644 --- a/packages/cli/src/config/settingsSchema.ts +++ b/packages/cli/src/config/settingsSchema.ts @@ -877,6 +877,48 @@ const SETTINGS_SCHEMA = { description: 'Whether to load memory files from include directories.', showInDialog: false, }, + clearContextOnIdle: { + type: 'object', + label: 'Clear Context On Idle', + category: 'Context', + requiresRestart: false, + default: {}, + description: + 'Settings for clearing stale context after idle periods. Use -1 to disable a threshold.', + showInDialog: false, + properties: { + thinkingThresholdMinutes: { + type: 'number', + label: 'Thinking Idle Threshold (minutes)', + category: 'Context', + requiresRestart: false, + default: 5 as number, + description: + 'Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.', + showInDialog: false, + }, + toolResultsThresholdMinutes: { + type: 'number', + label: 'Tool Results Idle Threshold (minutes)', + category: 'Context', + requiresRestart: false, + default: 60 as number, + description: + 'Minutes of inactivity before clearing old tool result content. Use -1 to disable.', + showInDialog: false, + }, + toolResultsNumToKeep: { + type: 'number', + label: 'Tool Results Number To Keep', + category: 'Context', + requiresRestart: false, + default: 5 as number, + description: + 'Number of most-recent compactable tool results to preserve when clearing. Floor at 1.', + showInDialog: false, + }, + }, + }, fileFiltering: { type: 'object', label: 'File Filtering', @@ -924,16 +966,6 @@ const SETTINGS_SCHEMA = { }, }, }, - gapThresholdMinutes: { - type: 'number', - label: 'Thinking Block Idle Threshold (minutes)', - category: 'Context', - requiresRestart: false, - default: 5, - description: - 'Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.', - showInDialog: false, - }, }, }, diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index b262b374f6..791bc0824f 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -209,6 +209,19 @@ export interface ChatCompressionSettings { contextPercentageThreshold?: number; } +/** + * Settings for clearing stale context after idle periods. + * Threshold values of -1 mean "never clear" (disabled). + */ +export interface ClearContextOnIdleSettings { + /** Minutes idle before clearing old thinking blocks. Default 5. Use -1 to disable. */ + thinkingThresholdMinutes?: number; + /** Minutes idle before clearing old tool results. Default 60. Use -1 to disable. */ + toolResultsThresholdMinutes?: number; + /** Number of most-recent tool results to preserve. Default 5. */ + toolResultsNumToKeep?: number; +} + export interface TelemetrySettings { enabled?: boolean; target?: TelemetryTarget; @@ -371,8 +384,7 @@ export interface ConfigParameters { model?: string; outputLanguageFilePath?: string; maxSessionTurns?: number; - /** Minutes of inactivity before clearing retained thinking blocks. */ - thinkingIdleThresholdMinutes?: number; + clearContextOnIdle?: ClearContextOnIdleSettings; sessionTokenLimit?: number; experimentalZedIntegration?: boolean; cronEnabled?: boolean; @@ -561,7 +573,7 @@ export class Config { private ideMode: boolean; private readonly maxSessionTurns: number; - private readonly thinkingIdleThresholdMs: number; + private readonly clearContextOnIdle: ClearContextOnIdleSettings; private readonly sessionTokenLimit: number; private readonly listExtensions: boolean; private readonly overrideExtensions?: string[]; @@ -688,8 +700,14 @@ export class Config { this.fileDiscoveryService = params.fileDiscoveryService ?? null; this.bugCommand = params.bugCommand; this.maxSessionTurns = params.maxSessionTurns ?? -1; - this.thinkingIdleThresholdMs = - (params.thinkingIdleThresholdMinutes ?? 5) * 60 * 1000; + this.clearContextOnIdle = { + thinkingThresholdMinutes: + params.clearContextOnIdle?.thinkingThresholdMinutes ?? 5, + toolResultsThresholdMinutes: + params.clearContextOnIdle?.toolResultsThresholdMinutes ?? 60, + toolResultsNumToKeep: + params.clearContextOnIdle?.toolResultsNumToKeep ?? 5, + }; this.sessionTokenLimit = params.sessionTokenLimit ?? -1; this.experimentalZedIntegration = params.experimentalZedIntegration ?? false; @@ -1336,8 +1354,8 @@ export class Config { return this.maxSessionTurns; } - getThinkingIdleThresholdMs(): number { - return this.thinkingIdleThresholdMs; + getClearContextOnIdle(): ClearContextOnIdleSettings { + return this.clearContextOnIdle; } getSessionTokenLimit(): number { diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 9f7ead5c67..746274d5dd 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -323,7 +323,11 @@ describe('Gemini Client (client.ts)', () => { getWorkingDir: vi.fn().mockReturnValue('/test/dir'), getFileService: vi.fn().mockReturnValue(fileService), getMaxSessionTurns: vi.fn().mockReturnValue(0), - getThinkingIdleThresholdMs: vi.fn().mockReturnValue(5 * 60 * 1000), + getClearContextOnIdle: vi.fn().mockReturnValue({ + thinkingThresholdMinutes: 5, + toolResultsThresholdMinutes: 60, + toolResultsNumToKeep: 5, + }), getSessionTokenLimit: vi.fn().mockReturnValue(32000), getNoBrowser: vi.fn().mockReturnValue(false), getUsageStatisticsEnabled: vi.fn().mockReturnValue(true), diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 13fc86aaa9..4fe6bd41cf 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -16,6 +16,7 @@ import type { // Config import { ApprovalMode, type Config } from '../config/config.js'; import { createDebugLogger } from '../utils/debugLogger.js'; +import { microcompactHistory } from '../services/microcompaction/microcompact.js'; const debugLogger = createDebugLogger('CLIENT'); @@ -559,15 +560,16 @@ export class GeminiClient { // record user message for session management this.config.getChatRecordingService()?.recordUserMessage(request); - // Thinking block cross-turn retention with idle cleanup: - // - Active session (< threshold idle): keep thinking blocks for reasoning coherence - // - Idle > threshold: clear old thinking, keep only last 1 turn to free context - // - Latch: once triggered, never revert — prevents oscillation + // Idle cleanup: clear stale thinking blocks after idle period. + // Latch: once triggered, never revert — prevents oscillation. + const idleConfig = this.config.getClearContextOnIdle(); + const thinkingThresholdMin = idleConfig.thinkingThresholdMinutes ?? 5; if ( + thinkingThresholdMin >= 0 && !this.thinkingClearLatched && this.lastApiCompletionTimestamp !== null ) { - const thresholdMs = this.config.getThinkingIdleThresholdMs(); + const thresholdMs = thinkingThresholdMin * 60 * 1000; const idleMs = Date.now() - this.lastApiCompletionTimestamp; if (idleMs > thresholdMs) { this.thinkingClearLatched = true; @@ -580,6 +582,25 @@ export class GeminiClient { this.getChat().stripThoughtsFromHistoryKeepRecent(1); debugLogger.debug('Stripped old thinking blocks (keeping last 1 turn)'); } + + // Idle cleanup: clear old tool results when idle > threshold. + // Runs on user and cron messages (not tool result submissions or + // retries/hooks) so that model latency during a tool-call loop + // doesn't count as user idle time. + const mcResult = microcompactHistory( + this.getChat().getHistory(), + this.lastApiCompletionTimestamp, + this.config.getClearContextOnIdle(), + ); + if (mcResult.meta) { + this.getChat().setHistory(mcResult.history); + const m = mcResult.meta; + debugLogger.debug( + `[TIME-BASED MC] gap ${m.gapMinutes}min > ${m.thresholdMinutes}min, ` + + `cleared ${m.toolsCleared} tool results (~${m.tokensSaved} tokens), ` + + `kept last ${m.toolsKept}`, + ); + } } if (messageType !== SendMessageType.Retry) { this.sessionTurnCount++; diff --git a/packages/core/src/services/microcompaction/microcompact.test.ts b/packages/core/src/services/microcompaction/microcompact.test.ts new file mode 100644 index 0000000000..8f4ac75105 --- /dev/null +++ b/packages/core/src/services/microcompaction/microcompact.test.ts @@ -0,0 +1,392 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it, afterEach } from 'vitest'; +import type { Content } from '@google/genai'; +import type { ClearContextOnIdleSettings } from '../../config/config.js'; + +import { + evaluateTimeBasedTrigger, + microcompactHistory, + MICROCOMPACT_CLEARED_MESSAGE, +} from './microcompact.js'; + +function clearEnv() { + delete process.env['QWEN_MC_KEEP_RECENT']; +} + +function makeToolCall(name: string): Content { + return { + role: 'model', + parts: [{ functionCall: { name, args: {} } }], + }; +} + +function makeToolResult(name: string, output: string): Content { + return { + role: 'user', + parts: [{ functionResponse: { name, response: { output } } }], + }; +} + +function makeUserMessage(text: string): Content { + return { role: 'user', parts: [{ text }] }; +} + +function makeModelMessage(text: string): Content { + return { role: 'model', parts: [{ text }] }; +} + +const DEFAULT_SETTINGS: ClearContextOnIdleSettings = { + thinkingThresholdMinutes: 5, + toolResultsThresholdMinutes: 5, + toolResultsNumToKeep: 1, +}; + +describe('evaluateTimeBasedTrigger', () => { + it('should return null when disabled (-1)', () => { + const result = evaluateTimeBasedTrigger(Date.now() - 2 * 60 * 60 * 1000, { + ...DEFAULT_SETTINGS, + toolResultsThresholdMinutes: -1, + }); + expect(result).toBeNull(); + }); + + it('should return null when no prior API completion', () => { + const result = evaluateTimeBasedTrigger(null, DEFAULT_SETTINGS); + expect(result).toBeNull(); + }); + + it('should return null when gap is under threshold', () => { + const result = evaluateTimeBasedTrigger( + Date.now() - 1 * 60 * 1000, + DEFAULT_SETTINGS, + ); + expect(result).toBeNull(); + }); + + it('should fire when gap exceeds threshold', () => { + const result = evaluateTimeBasedTrigger( + Date.now() - 10 * 60 * 1000, + DEFAULT_SETTINGS, + ); + expect(result).not.toBeNull(); + expect(result!.gapMs).toBeGreaterThan(5 * 60 * 1000); + }); + + it('should respect custom threshold', () => { + const result = evaluateTimeBasedTrigger(Date.now() - 10 * 1000, { + ...DEFAULT_SETTINGS, + toolResultsThresholdMinutes: 0.1, + }); + expect(result).not.toBeNull(); + }); + + it('should return null for non-finite gap', () => { + const result = evaluateTimeBasedTrigger(NaN, DEFAULT_SETTINGS); + expect(result).toBeNull(); + }); +}); + +describe('microcompactHistory', () => { + afterEach(clearEnv); + + const twoHoursAgo = Date.now() - 2 * 60 * 60 * 1000; + + it('should return history unchanged when trigger does not fire', () => { + const history: Content[] = [ + makeUserMessage('hello'), + makeModelMessage('hi'), + ]; + const result = microcompactHistory(history, Date.now(), DEFAULT_SETTINGS); + expect(result.history).toBe(history); + expect(result.meta).toBeUndefined(); + }); + + it('should clear old compactable tool results and keep recent', () => { + const history: Content[] = [ + makeUserMessage('msg1'), + makeModelMessage('resp1'), + makeToolCall('read_file'), + makeToolResult('read_file', 'old file content that is very long'), + makeToolCall('read_file'), + makeToolResult('read_file', 'recent file content'), + ]; + + const result = microcompactHistory(history, twoHoursAgo, DEFAULT_SETTINGS); + + expect(result.meta).toBeDefined(); + expect(result.meta!.toolsCleared).toBe(1); + expect(result.meta!.toolsKept).toBe(1); + + expect( + result.history[3]!.parts![0]!.functionResponse!.response!['output'], + ).toBe(MICROCOMPACT_CLEARED_MESSAGE); + expect( + result.history[5]!.parts![0]!.functionResponse!.response!['output'], + ).toBe('recent file content'); + }); + + it('should not clear non-compactable tools', () => { + const history: Content[] = [ + makeToolCall('ask_user_question'), + makeToolResult('ask_user_question', 'user answer'), + makeToolCall('read_file'), + makeToolResult('read_file', 'file content'), + ]; + + const result = microcompactHistory(history, twoHoursAgo, { + ...DEFAULT_SETTINGS, + toolResultsNumToKeep: 0, + }); + + expect( + result.history[1]!.parts![0]!.functionResponse!.response!['output'], + ).toBe('user answer'); + // keepRecent floored to 1 — only 1 compactable, so it's kept + expect(result.meta).toBeUndefined(); + }); + + it('should skip already-cleared results', () => { + const history: Content[] = [ + makeToolCall('read_file'), + makeToolResult('read_file', MICROCOMPACT_CLEARED_MESSAGE), + makeToolCall('read_file'), + makeToolResult('read_file', 'new content'), + ]; + + const result = microcompactHistory(history, twoHoursAgo, DEFAULT_SETTINGS); + expect(result.meta).toBeUndefined(); + }); + + it('should handle keepRecent > compactable count (no-op)', () => { + const history: Content[] = [ + makeToolCall('read_file'), + makeToolResult('read_file', 'only result'), + ]; + + const result = microcompactHistory(history, twoHoursAgo, { + ...DEFAULT_SETTINGS, + toolResultsNumToKeep: 5, + }); + + expect(result.meta).toBeUndefined(); + expect( + result.history[1]!.parts![0]!.functionResponse!.response!['output'], + ).toBe('only result'); + }); + + it('should floor keepRecent to 1', () => { + const history: Content[] = [ + makeToolCall('read_file'), + makeToolResult('read_file', 'old content'), + makeToolCall('grep_search'), + makeToolResult('grep_search', 'grep results'), + ]; + + const result = microcompactHistory(history, twoHoursAgo, { + ...DEFAULT_SETTINGS, + toolResultsNumToKeep: 0, + }); + + expect(result.meta).toBeDefined(); + expect(result.meta!.toolsCleared).toBe(1); + expect(result.meta!.toolsKept).toBe(1); + + expect( + result.history[1]!.parts![0]!.functionResponse!.response!['output'], + ).toBe(MICROCOMPACT_CLEARED_MESSAGE); + expect( + result.history[3]!.parts![0]!.functionResponse!.response!['output'], + ).toBe('grep results'); + }); + + it('should preserve non-functionResponse parts in cleared Content', () => { + const history: Content[] = [ + { + role: 'user', + parts: [ + { text: 'some text' }, + { + functionResponse: { + name: 'read_file', + response: { output: 'file content' }, + }, + }, + ], + }, + makeToolCall('read_file'), + makeToolResult('read_file', 'recent'), + ]; + + const result = microcompactHistory(history, twoHoursAgo, DEFAULT_SETTINGS); + + expect(result.meta).toBeDefined(); + expect(result.history[0]!.parts![0]!.text).toBe('some text'); + expect( + result.history[0]!.parts![1]!.functionResponse!.response!['output'], + ).toBe(MICROCOMPACT_CLEARED_MESSAGE); + }); + + it('should preserve functionResponse name after clearing', () => { + const history: Content[] = [ + makeToolCall('read_file'), + makeToolResult('read_file', 'content'), + makeToolCall('read_file'), + makeToolResult('read_file', 'recent'), + ]; + + const result = microcompactHistory(history, twoHoursAgo, DEFAULT_SETTINGS); + + expect(result.history[1]!.parts![0]!.functionResponse!.name).toBe( + 'read_file', + ); + }); + + it('should count per-part not per-Content for batched tool results', () => { + const history: Content[] = [ + { + role: 'model', + parts: [ + { functionCall: { name: 'read_file', args: {} } }, + { functionCall: { name: 'read_file', args: {} } }, + { functionCall: { name: 'read_file', args: {} } }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read_file', + response: { output: 'file-a' }, + }, + }, + { + functionResponse: { + name: 'read_file', + response: { output: 'file-b' }, + }, + }, + { + functionResponse: { + name: 'read_file', + response: { output: 'file-c' }, + }, + }, + ], + }, + ]; + + const result = microcompactHistory(history, twoHoursAgo, DEFAULT_SETTINGS); + + expect(result.meta).toBeDefined(); + expect(result.meta!.toolsCleared).toBe(2); + expect(result.meta!.toolsKept).toBe(1); + + const parts = result.history[1]!.parts!; + expect(parts[0]!.functionResponse!.response!['output']).toBe( + MICROCOMPACT_CLEARED_MESSAGE, + ); + expect(parts[1]!.functionResponse!.response!['output']).toBe( + MICROCOMPACT_CLEARED_MESSAGE, + ); + expect(parts[2]!.functionResponse!.response!['output']).toBe('file-c'); + }); + + it('should handle mixed batched and separate tool results', () => { + const history: Content[] = [ + makeToolCall('read_file'), + makeToolResult('read_file', 'old-single'), + { + role: 'model', + parts: [ + { functionCall: { name: 'read_file', args: {} } }, + { functionCall: { name: 'grep_search', args: {} } }, + ], + }, + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read_file', + response: { output: 'batched-read' }, + }, + }, + { + functionResponse: { + name: 'grep_search', + response: { output: 'batched-grep' }, + }, + }, + ], + }, + ]; + + const result = microcompactHistory(history, twoHoursAgo, { + ...DEFAULT_SETTINGS, + toolResultsNumToKeep: 2, + }); + + expect(result.meta).toBeDefined(); + expect(result.meta!.toolsCleared).toBe(1); + expect(result.meta!.toolsKept).toBe(2); + + expect( + result.history[1]!.parts![0]!.functionResponse!.response!['output'], + ).toBe(MICROCOMPACT_CLEARED_MESSAGE); + expect( + result.history[3]!.parts![0]!.functionResponse!.response!['output'], + ).toBe('batched-read'); + expect( + result.history[3]!.parts![1]!.functionResponse!.response!['output'], + ).toBe('batched-grep'); + }); + + it('should not clear tool error responses', () => { + const history: Content[] = [ + makeToolCall('read_file'), + { + role: 'user', + parts: [ + { + functionResponse: { + name: 'read_file', + response: { error: 'File not found: /missing.txt' }, + }, + }, + ], + }, + makeToolCall('read_file'), + makeToolResult('read_file', 'recent content'), + ]; + + const result = microcompactHistory(history, twoHoursAgo, DEFAULT_SETTINGS); + + expect( + result.history[1]!.parts![0]!.functionResponse!.response!['error'], + ).toBe('File not found: /missing.txt'); + expect( + result.history[1]!.parts![0]!.functionResponse!.response!['output'], + ).toBeUndefined(); + }); + + it('should estimate tokens saved', () => { + const longContent = 'x'.repeat(400); + const history: Content[] = [ + makeToolCall('read_file'), + makeToolResult('read_file', longContent), + makeToolCall('read_file'), + makeToolResult('read_file', 'recent'), + ]; + + const result = microcompactHistory(history, twoHoursAgo, DEFAULT_SETTINGS); + + expect(result.meta).toBeDefined(); + expect(result.meta!.tokensSaved).toBe(100); + }); +}); diff --git a/packages/core/src/services/microcompaction/microcompact.ts b/packages/core/src/services/microcompaction/microcompact.ts new file mode 100644 index 0000000000..c40653e5e7 --- /dev/null +++ b/packages/core/src/services/microcompaction/microcompact.ts @@ -0,0 +1,218 @@ +/** + * @license + * Copyright 2025 Google LLC + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Content, Part } from '@google/genai'; + +import type { ClearContextOnIdleSettings } from '../../config/config.js'; +import { ToolNames } from '../../tools/tool-names.js'; + +export const MICROCOMPACT_CLEARED_MESSAGE = '[Old tool result content cleared]'; + +const COMPACTABLE_TOOLS = new Set([ + ToolNames.READ_FILE, + ToolNames.SHELL, + ToolNames.GREP, + ToolNames.GLOB, + ToolNames.WEB_FETCH, + ToolNames.WEB_SEARCH, + ToolNames.EDIT, + ToolNames.WRITE_FILE, +]); + +// --- Trigger evaluation --- + +/** + * Check whether the time-based trigger should fire. + * + * A toolResultsThresholdMinutes of -1 means disabled (never clear). + */ +export function evaluateTimeBasedTrigger( + lastApiCompletionTimestamp: number | null, + settings: ClearContextOnIdleSettings, +): { gapMs: number } | null { + const thresholdMin = settings.toolResultsThresholdMinutes ?? 60; + // -1 means disabled + if (thresholdMin < 0) { + return null; + } + if (lastApiCompletionTimestamp === null) { + return null; + } + const thresholdMs = thresholdMin * 60_000; + const gapMs = Date.now() - lastApiCompletionTimestamp; + if (!Number.isFinite(gapMs) || gapMs < thresholdMs) { + return null; + } + return { gapMs }; +} + +// --- Collection --- + +/** Pointer to a single compactable functionResponse part. */ +interface PartRef { + contentIndex: number; + partIndex: number; +} + +/** + * Collect references to individual compactable functionResponse parts + * across the history, in encounter order. This counts per-part (not + * per-Content-entry) so keepRecent applies to individual tool results + * even when multiple results are batched into one Content message. + */ +function collectCompactablePartRefs(history: Content[]): PartRef[] { + const refs: PartRef[] = []; + for (let ci = 0; ci < history.length; ci++) { + const content = history[ci]!; + if (content.role !== 'user' || !content.parts) continue; + for (let pi = 0; pi < content.parts.length; pi++) { + const part = content.parts[pi]!; + if ( + part.functionResponse?.name && + COMPACTABLE_TOOLS.has(part.functionResponse.name) + ) { + refs.push({ contentIndex: ci, partIndex: pi }); + } + } + } + return refs; +} + +// --- Helpers --- + +/** True when the functionResponse carries an error (not a success output). */ +function isErrorResponse(part: Part): boolean { + return part.functionResponse?.response?.['error'] !== undefined; +} + +function estimatePartTokens(part: Part): number { + if (!part.functionResponse?.response) return 0; + const output = part.functionResponse.response['output']; + if (typeof output !== 'string') return 0; + return Math.ceil(output.length / 4); +} + +function isAlreadyCleared(part: Part): boolean { + return ( + part.functionResponse?.response?.['output'] === MICROCOMPACT_CLEARED_MESSAGE + ); +} + +// --- Main entry point --- + +export interface MicrocompactMeta { + gapMinutes: number; + thresholdMinutes: number; + toolsCleared: number; + toolsKept: number; + keepRecent: number; + tokensSaved: number; +} + +/** + * Microcompact history: clear old compactable tool results when the + * time-based trigger fires. + * + * Returns the (potentially modified) history and optional metadata + * about what was cleared (for logging by the caller). + */ +export function microcompactHistory( + history: Content[], + lastApiCompletionTimestamp: number | null, + settings: ClearContextOnIdleSettings, +): { history: Content[]; meta?: MicrocompactMeta } { + const trigger = evaluateTimeBasedTrigger( + lastApiCompletionTimestamp, + settings, + ); + if (!trigger) { + return { history }; + } + const { gapMs } = trigger; + + const envKeep = process.env['QWEN_MC_KEEP_RECENT']; + const rawKeepRecent = + envKeep !== undefined && Number.isFinite(Number(envKeep)) + ? Number(envKeep) + : (settings.toolResultsNumToKeep ?? 5); + const keepRecent = Number.isFinite(rawKeepRecent) + ? Math.max(1, rawKeepRecent) + : 5; + + const allRefs = collectCompactablePartRefs(history); + const keepRefs = new Set( + allRefs.slice(-keepRecent).map((r) => `${r.contentIndex}:${r.partIndex}`), + ); + const clearRefs = allRefs.filter( + (r) => !keepRefs.has(`${r.contentIndex}:${r.partIndex}`), + ); + + if (clearRefs.length === 0) { + return { history }; + } + + // Build a lookup: contentIndex → Set of partIndices to clear + const clearMap = new Map>(); + for (const ref of clearRefs) { + let parts = clearMap.get(ref.contentIndex); + if (!parts) { + parts = new Set(); + clearMap.set(ref.contentIndex, parts); + } + parts.add(ref.partIndex); + } + + let tokensSaved = 0; + let toolsCleared = 0; + + const result: Content[] = history.map((content, ci) => { + const partsToClean = clearMap.get(ci); + if (!partsToClean || !content.parts) return content; + + let touched = false; + const newParts = content.parts.map((part, pi) => { + if ( + partsToClean.has(pi) && + part.functionResponse?.name && + COMPACTABLE_TOOLS.has(part.functionResponse.name) && + !isAlreadyCleared(part) && + !isErrorResponse(part) + ) { + tokensSaved += estimatePartTokens(part); + toolsCleared++; + touched = true; + return { + functionResponse: { + ...part.functionResponse, + response: { output: MICROCOMPACT_CLEARED_MESSAGE }, + }, + }; + } + return part; + }); + + if (!touched) return content; + return { ...content, parts: newParts }; + }); + + if (tokensSaved === 0) { + return { history }; + } + + const thresholdMinutes = settings.toolResultsThresholdMinutes ?? 60; + + return { + history: result, + meta: { + gapMinutes: Math.round(gapMs / 60_000), + thresholdMinutes, + toolsCleared, + toolsKept: allRefs.length - clearRefs.length, + keepRecent, + tokensSaved, + }, + }; +} diff --git a/packages/vscode-ide-companion/schemas/settings.schema.json b/packages/vscode-ide-companion/schemas/settings.schema.json index 39a89eec1e..59a395d54b 100644 --- a/packages/vscode-ide-companion/schemas/settings.schema.json +++ b/packages/vscode-ide-companion/schemas/settings.schema.json @@ -363,6 +363,27 @@ "type": "boolean", "default": false }, + "clearContextOnIdle": { + "description": "Settings for clearing stale context after idle periods. Use -1 to disable a threshold.", + "type": "object", + "properties": { + "thinkingThresholdMinutes": { + "description": "Minutes of inactivity before clearing old thinking blocks. Use -1 to disable.", + "type": "number", + "default": 5 + }, + "toolResultsThresholdMinutes": { + "description": "Minutes of inactivity before clearing old tool result content. Use -1 to disable.", + "type": "number", + "default": 60 + }, + "toolResultsNumToKeep": { + "description": "Number of most-recent compactable tool results to preserve when clearing. Floor at 1.", + "type": "number", + "default": 5 + } + } + }, "fileFiltering": { "description": "Settings for git-aware file filtering.", "type": "object", @@ -388,11 +409,6 @@ "default": true } } - }, - "gapThresholdMinutes": { - "description": "Minutes of inactivity after which retained thinking blocks are cleared to free context tokens. Aligns with provider prompt-cache TTL.", - "type": "number", - "default": 5 } } },