From 6e34b3102b63450c35acbe7cb979bfc1257d5bc4 Mon Sep 17 00:00:00 2001 From: netbrah <162479981+netbrah@users.noreply.github.com> Date: Fri, 13 Mar 2026 11:39:17 -0400 Subject: [PATCH 1/3] fix: auto-detect max_tokens from model when not set by provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When modelProviders config does not specify samplingParams.max_tokens, requests to non-Qwen models (Claude, GPT, Gemini, etc.) omit max_tokens entirely. Many APIs default to a small value (e.g., Anthropic via VertexAI defaults to 4096), causing long responses to be truncated mid-generation — often breaking tool call parameters. Fix: apply tokenLimit(model, 'output') as a fallback in applyResolvedModelDefaults(), following the same pattern already used for contextWindowSize and modalities auto-detection. Output limits from tokenLimits.ts: - Claude Opus 4.6: 128K - Claude Sonnet 4.6 / fallback: 64K - GPT-5.x: 128K - Gemini 3.x: 64K - Qwen 3.5: 64K Made-with: Cursor --- packages/core/src/models/modelsConfig.ts | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/core/src/models/modelsConfig.ts b/packages/core/src/models/modelsConfig.ts index d22cc790cb..d9cb50c96c 100644 --- a/packages/core/src/models/modelsConfig.ts +++ b/packages/core/src/models/modelsConfig.ts @@ -772,6 +772,22 @@ export class ModelsConfig { }; } + // max_tokens fallback: auto-detect from model when not set by provider. + // Without this, requests to non-Qwen models (Claude, GPT, etc.) may omit + // max_tokens entirely, causing the API to use a small default (e.g. 4096) + // and truncating long responses mid-tool-call. + if (!this._generationConfig.samplingParams?.max_tokens) { + const outputLimit = tokenLimit(model.id, 'output'); + if (!this._generationConfig.samplingParams) { + this._generationConfig.samplingParams = {}; + } + this._generationConfig.samplingParams.max_tokens = outputLimit; + this.generationConfigSources['samplingParams'] = { + kind: 'computed', + detail: 'max_tokens auto-detected from model', + }; + } + // modalities fallback: auto-detect from model when not set by provider if (gc.modalities === undefined) { this._generationConfig.modalities = defaultModalities(model.id); From 7f0942066b3a981d6d1de5286471f7e254cc5003 Mon Sep 17 00:00:00 2001 From: "mingholy.lmh" Date: Mon, 16 Mar 2026 17:00:00 +0800 Subject: [PATCH 2/3] fix(models): improve max_tokens auto-detection source tracking and add tests Co-authored-by: Qwen-Coder - Fix generationConfigSources to preserve existing source info when auto-detecting max_tokens - Add unit tests for max_tokens fallback logic --- packages/core/src/models/modelsConfig.test.ts | 139 ++++++++++++++++++ packages/core/src/models/modelsConfig.ts | 5 +- 2 files changed, 143 insertions(+), 1 deletion(-) diff --git a/packages/core/src/models/modelsConfig.test.ts b/packages/core/src/models/modelsConfig.test.ts index 25268aebe5..004acb2301 100644 --- a/packages/core/src/models/modelsConfig.test.ts +++ b/packages/core/src/models/modelsConfig.test.ts @@ -1506,4 +1506,143 @@ describe('ModelsConfig', () => { expect(allModels.some((m) => m.id === 'gemini-ultra')).toBe(true); }); }); + + describe('max_tokens fallback', () => { + it('should auto-detect max_tokens when samplingParams is undefined', async () => { + const modelProvidersConfig: ModelProvidersConfig = { + openai: [ + { + id: 'gpt-4', + name: 'GPT-4', + baseUrl: 'https://api.openai.example.com/v1', + // No generationConfig.samplingParams defined + }, + ], + }; + + const modelsConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_OPENAI, + modelProvidersConfig, + }); + + await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4'); + + const gc = currentGenerationConfig(modelsConfig); + // GPT-4 output limit is 16K per tokenLimits.ts + expect(gc.samplingParams?.max_tokens).toBe(16384); + expect(gc.samplingParams?.temperature).toBeUndefined(); + + const sources = modelsConfig.getGenerationConfigSources(); + expect(sources['samplingParams']?.kind).toBe('computed'); + // Even when samplingParams is not explicitly defined in provider config, + // the field is still tracked as from modelProviders, so the detail reflects that + expect(sources['samplingParams']?.detail).toBe( + 'max_tokens auto-detected from model (other params from modelProviders)', + ); + }); + + it('should auto-detect max_tokens when samplingParams exists but max_tokens is missing', async () => { + const modelProvidersConfig: ModelProvidersConfig = { + openai: [ + { + id: 'gpt-4', + name: 'GPT-4', + baseUrl: 'https://api.openai.example.com/v1', + generationConfig: { + samplingParams: { temperature: 0.7 }, // max_tokens not defined + }, + }, + ], + }; + + const modelsConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_OPENAI, + modelProvidersConfig, + }); + + await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4'); + + const gc = currentGenerationConfig(modelsConfig); + // Should preserve temperature from provider and add max_tokens + expect(gc.samplingParams?.temperature).toBe(0.7); + expect(gc.samplingParams?.max_tokens).toBe(16384); + + const sources = modelsConfig.getGenerationConfigSources(); + expect(sources['samplingParams']?.kind).toBe('computed'); + expect(sources['samplingParams']?.detail).toBe( + 'max_tokens auto-detected from model (other params from modelProviders)', + ); + }); + + it('should not override existing max_tokens from modelProviders', async () => { + const modelProvidersConfig: ModelProvidersConfig = { + openai: [ + { + id: 'gpt-4', + name: 'GPT-4', + baseUrl: 'https://api.openai.example.com/v1', + generationConfig: { + samplingParams: { temperature: 0.7, max_tokens: 4096 }, + }, + }, + ], + }; + + const modelsConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_OPENAI, + modelProvidersConfig, + }); + + await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4'); + + const gc = currentGenerationConfig(modelsConfig); + // Should preserve both values from provider + expect(gc.samplingParams?.temperature).toBe(0.7); + expect(gc.samplingParams?.max_tokens).toBe(4096); + + const sources = modelsConfig.getGenerationConfigSources(); + expect(sources['samplingParams']?.kind).toBe('modelProviders'); + }); + + it('should use correct output limit for different model families', async () => { + const modelProvidersConfig: ModelProvidersConfig = { + anthropic: [ + { + id: 'claude-3-opus', + name: 'Claude 3 Opus', + baseUrl: 'https://api.anthropic.example.com/v1', + }, + ], + gemini: [ + { + id: 'gemini-pro', + name: 'Gemini Pro', + baseUrl: 'https://api.gemini.example.com/v1', + }, + ], + }; + + // Test Claude model (64K output limit) + const claudeConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_ANTHROPIC, + modelProvidersConfig, + }); + + await claudeConfig.switchModel(AuthType.USE_ANTHROPIC, 'claude-3-opus'); + + let gc = currentGenerationConfig(claudeConfig); + expect(gc.samplingParams?.max_tokens).toBe(65536); // 64K = 2^16 + + // Test Gemini model (8K output limit) + const geminiConfig = new ModelsConfig({ + initialAuthType: AuthType.USE_GEMINI, + modelProvidersConfig, + }); + + await geminiConfig.switchModel(AuthType.USE_GEMINI, 'gemini-pro'); + + gc = currentGenerationConfig(geminiConfig); + expect(gc.samplingParams?.max_tokens).toBe(8192); + }); + }); }); diff --git a/packages/core/src/models/modelsConfig.ts b/packages/core/src/models/modelsConfig.ts index d9cb50c96c..d9749bb961 100644 --- a/packages/core/src/models/modelsConfig.ts +++ b/packages/core/src/models/modelsConfig.ts @@ -782,9 +782,12 @@ export class ModelsConfig { this._generationConfig.samplingParams = {}; } this._generationConfig.samplingParams.max_tokens = outputLimit; + const existingSource = this.generationConfigSources['samplingParams']; this.generationConfigSources['samplingParams'] = { kind: 'computed', - detail: 'max_tokens auto-detected from model', + detail: existingSource + ? `max_tokens auto-detected from model (other params from ${existingSource.kind})` + : 'max_tokens auto-detected from model', }; } From 6f67b12446be6950a46658afbaf562063dc16ef8 Mon Sep 17 00:00:00 2001 From: "mingholy.lmh" Date: Mon, 16 Mar 2026 17:21:32 +0800 Subject: [PATCH 3/3] fix: lint error --- packages/core/src/services/shellExecutionService.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts index 2c90c6e717..823b947f2e 100644 --- a/packages/core/src/services/shellExecutionService.test.ts +++ b/packages/core/src/services/shellExecutionService.test.ts @@ -124,8 +124,8 @@ const setupConflictingPathEnv = () => { }; const expectNormalizedWindowsPathEnv = (env: NodeJS.ProcessEnv) => { - expect(env.PATH).toBe(EXPECTED_MERGED_WINDOWS_PATH); - expect(env.Path).toBeUndefined(); + expect(env['PATH']).toBe(EXPECTED_MERGED_WINDOWS_PATH); + expect(env['Path']).toBeUndefined(); }; describe('ShellExecutionService', () => {