From 6e34b3102b63450c35acbe7cb979bfc1257d5bc4 Mon Sep 17 00:00:00 2001
From: netbrah <162479981+netbrah@users.noreply.github.com>
Date: Fri, 13 Mar 2026 11:39:17 -0400
Subject: [PATCH 1/3] fix: auto-detect max_tokens from model when not set by
 provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When modelProviders config does not specify samplingParams.max_tokens,
requests to non-Qwen models (Claude, GPT, Gemini, etc.) omit max_tokens
entirely. Many APIs default to a small value (e.g., Anthropic via
VertexAI defaults to 4096), causing long responses to be truncated
mid-generation — often breaking tool call parameters.

Fix: apply tokenLimit(model, 'output') as a fallback in
applyResolvedModelDefaults(), following the same pattern already used
for contextWindowSize and modalities auto-detection.

Output limits from tokenLimits.ts:
  - Claude Opus 4.6: 128K
  - Claude Sonnet 4.6 / fallback: 64K
  - GPT-5.x: 128K
  - Gemini 3.x: 64K
  - Qwen 3.5: 64K

Made-with: Cursor
---
 packages/core/src/models/modelsConfig.ts | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/packages/core/src/models/modelsConfig.ts b/packages/core/src/models/modelsConfig.ts
index d22cc790cb..d9cb50c96c 100644
--- a/packages/core/src/models/modelsConfig.ts
+++ b/packages/core/src/models/modelsConfig.ts
@@ -772,6 +772,22 @@ export class ModelsConfig {
       };
     }
 
+    // max_tokens fallback: auto-detect from model when not set by provider.
+    // Without this, requests to non-Qwen models (Claude, GPT, etc.) may omit
+    // max_tokens entirely, causing the API to use a small default (e.g. 4096)
+    // and truncating long responses mid-tool-call.
+    if (!this._generationConfig.samplingParams?.max_tokens) {
+      const outputLimit = tokenLimit(model.id, 'output');
+      if (!this._generationConfig.samplingParams) {
+        this._generationConfig.samplingParams = {};
+      }
+      this._generationConfig.samplingParams.max_tokens = outputLimit;
+      this.generationConfigSources['samplingParams'] = {
+        kind: 'computed',
+        detail: 'max_tokens auto-detected from model',
+      };
+    }
+
     // modalities fallback: auto-detect from model when not set by provider
     if (gc.modalities === undefined) {
       this._generationConfig.modalities = defaultModalities(model.id);

From 7f0942066b3a981d6d1de5286471f7e254cc5003 Mon Sep 17 00:00:00 2001
From: "mingholy.lmh" <mingholy.lmh@alibaba-inc.com>
Date: Mon, 16 Mar 2026 17:00:00 +0800
Subject: [PATCH 2/3] fix(models): improve max_tokens auto-detection source
 tracking and add tests

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>

- Fix generationConfigSources to preserve existing source info when auto-detecting max_tokens

- Add unit tests for max_tokens fallback logic
---
 packages/core/src/models/modelsConfig.test.ts | 139 ++++++++++++++++++
 packages/core/src/models/modelsConfig.ts      |   5 +-
 2 files changed, 143 insertions(+), 1 deletion(-)

diff --git a/packages/core/src/models/modelsConfig.test.ts b/packages/core/src/models/modelsConfig.test.ts
index 25268aebe5..004acb2301 100644
--- a/packages/core/src/models/modelsConfig.test.ts
+++ b/packages/core/src/models/modelsConfig.test.ts
@@ -1506,4 +1506,143 @@ describe('ModelsConfig', () => {
       expect(allModels.some((m) => m.id === 'gemini-ultra')).toBe(true);
     });
   });
+
+  describe('max_tokens fallback', () => {
+    it('should auto-detect max_tokens when samplingParams is undefined', async () => {
+      const modelProvidersConfig: ModelProvidersConfig = {
+        openai: [
+          {
+            id: 'gpt-4',
+            name: 'GPT-4',
+            baseUrl: 'https://api.openai.example.com/v1',
+            // No generationConfig.samplingParams defined
+          },
+        ],
+      };
+
+      const modelsConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_OPENAI,
+        modelProvidersConfig,
+      });
+
+      await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4');
+
+      const gc = currentGenerationConfig(modelsConfig);
+      // GPT-4 output limit is 16K per tokenLimits.ts
+      expect(gc.samplingParams?.max_tokens).toBe(16384);
+      expect(gc.samplingParams?.temperature).toBeUndefined();
+
+      const sources = modelsConfig.getGenerationConfigSources();
+      expect(sources['samplingParams']?.kind).toBe('computed');
+      // Even when samplingParams is not explicitly defined in provider config,
+      // the field is still tracked as from modelProviders, so the detail reflects that
+      expect(sources['samplingParams']?.detail).toBe(
+        'max_tokens auto-detected from model (other params from modelProviders)',
+      );
+    });
+
+    it('should auto-detect max_tokens when samplingParams exists but max_tokens is missing', async () => {
+      const modelProvidersConfig: ModelProvidersConfig = {
+        openai: [
+          {
+            id: 'gpt-4',
+            name: 'GPT-4',
+            baseUrl: 'https://api.openai.example.com/v1',
+            generationConfig: {
+              samplingParams: { temperature: 0.7 }, // max_tokens not defined
+            },
+          },
+        ],
+      };
+
+      const modelsConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_OPENAI,
+        modelProvidersConfig,
+      });
+
+      await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4');
+
+      const gc = currentGenerationConfig(modelsConfig);
+      // Should preserve temperature from provider and add max_tokens
+      expect(gc.samplingParams?.temperature).toBe(0.7);
+      expect(gc.samplingParams?.max_tokens).toBe(16384);
+
+      const sources = modelsConfig.getGenerationConfigSources();
+      expect(sources['samplingParams']?.kind).toBe('computed');
+      expect(sources['samplingParams']?.detail).toBe(
+        'max_tokens auto-detected from model (other params from modelProviders)',
+      );
+    });
+
+    it('should not override existing max_tokens from modelProviders', async () => {
+      const modelProvidersConfig: ModelProvidersConfig = {
+        openai: [
+          {
+            id: 'gpt-4',
+            name: 'GPT-4',
+            baseUrl: 'https://api.openai.example.com/v1',
+            generationConfig: {
+              samplingParams: { temperature: 0.7, max_tokens: 4096 },
+            },
+          },
+        ],
+      };
+
+      const modelsConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_OPENAI,
+        modelProvidersConfig,
+      });
+
+      await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4');
+
+      const gc = currentGenerationConfig(modelsConfig);
+      // Should preserve both values from provider
+      expect(gc.samplingParams?.temperature).toBe(0.7);
+      expect(gc.samplingParams?.max_tokens).toBe(4096);
+
+      const sources = modelsConfig.getGenerationConfigSources();
+      expect(sources['samplingParams']?.kind).toBe('modelProviders');
+    });
+
+    it('should use correct output limit for different model families', async () => {
+      const modelProvidersConfig: ModelProvidersConfig = {
+        anthropic: [
+          {
+            id: 'claude-3-opus',
+            name: 'Claude 3 Opus',
+            baseUrl: 'https://api.anthropic.example.com/v1',
+          },
+        ],
+        gemini: [
+          {
+            id: 'gemini-pro',
+            name: 'Gemini Pro',
+            baseUrl: 'https://api.gemini.example.com/v1',
+          },
+        ],
+      };
+
+      // Test Claude model (64K output limit)
+      const claudeConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_ANTHROPIC,
+        modelProvidersConfig,
+      });
+
+      await claudeConfig.switchModel(AuthType.USE_ANTHROPIC, 'claude-3-opus');
+
+      let gc = currentGenerationConfig(claudeConfig);
+      expect(gc.samplingParams?.max_tokens).toBe(65536); // 64K = 2^16
+
+      // Test Gemini model (8K output limit)
+      const geminiConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_GEMINI,
+        modelProvidersConfig,
+      });
+
+      await geminiConfig.switchModel(AuthType.USE_GEMINI, 'gemini-pro');
+
+      gc = currentGenerationConfig(geminiConfig);
+      expect(gc.samplingParams?.max_tokens).toBe(8192);
+    });
+  });
 });
diff --git a/packages/core/src/models/modelsConfig.ts b/packages/core/src/models/modelsConfig.ts
index d9cb50c96c..d9749bb961 100644
--- a/packages/core/src/models/modelsConfig.ts
+++ b/packages/core/src/models/modelsConfig.ts
@@ -782,9 +782,12 @@ export class ModelsConfig {
         this._generationConfig.samplingParams = {};
       }
       this._generationConfig.samplingParams.max_tokens = outputLimit;
+      const existingSource = this.generationConfigSources['samplingParams'];
       this.generationConfigSources['samplingParams'] = {
         kind: 'computed',
-        detail: 'max_tokens auto-detected from model',
+        detail: existingSource
+          ? `max_tokens auto-detected from model (other params from ${existingSource.kind})`
+          : 'max_tokens auto-detected from model',
       };
     }
 

From 6f67b12446be6950a46658afbaf562063dc16ef8 Mon Sep 17 00:00:00 2001
From: "mingholy.lmh" <mingholy.lmh@alibaba-inc.com>
Date: Mon, 16 Mar 2026 17:21:32 +0800
Subject: [PATCH 3/3] fix: lint error

---
 packages/core/src/services/shellExecutionService.test.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts
index 2c90c6e717..823b947f2e 100644
--- a/packages/core/src/services/shellExecutionService.test.ts
+++ b/packages/core/src/services/shellExecutionService.test.ts
@@ -124,8 +124,8 @@ const setupConflictingPathEnv = () => {
 };
 
 const expectNormalizedWindowsPathEnv = (env: NodeJS.ProcessEnv) => {
-  expect(env.PATH).toBe(EXPECTED_MERGED_WINDOWS_PATH);
-  expect(env.Path).toBeUndefined();
+  expect(env['PATH']).toBe(EXPECTED_MERGED_WINDOWS_PATH);
+  expect(env['Path']).toBeUndefined();
 };
 
 describe('ShellExecutionService', () => {