QwenLM · Mingholy · Mar 16, 2026 · Mar 13, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/packages/core/src/models/modelsConfig.test.ts b/packages/core/src/models/modelsConfig.test.ts
@@ -1506,4 +1506,143 @@ describe('ModelsConfig', () => {
       expect(allModels.some((m) => m.id === 'gemini-ultra')).toBe(true);
     });
   });
+
+  describe('max_tokens fallback', () => {
+    it('should auto-detect max_tokens when samplingParams is undefined', async () => {
+      const modelProvidersConfig: ModelProvidersConfig = {
+        openai: [
+          {
+            id: 'gpt-4',
+            name: 'GPT-4',
+            baseUrl: 'https://api.openai.example.com/v1',
+            // No generationConfig.samplingParams defined
+          },
+        ],
+      };
+
+      const modelsConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_OPENAI,
+        modelProvidersConfig,
+      });
+
+      await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4');
+
+      const gc = currentGenerationConfig(modelsConfig);
+      // GPT-4 output limit is 16K per tokenLimits.ts
+      expect(gc.samplingParams?.max_tokens).toBe(16384);
+      expect(gc.samplingParams?.temperature).toBeUndefined();
+
+      const sources = modelsConfig.getGenerationConfigSources();
+      expect(sources['samplingParams']?.kind).toBe('computed');
+      // Even when samplingParams is not explicitly defined in provider config,
+      // the field is still tracked as from modelProviders, so the detail reflects that
+      expect(sources['samplingParams']?.detail).toBe(
+        'max_tokens auto-detected from model (other params from modelProviders)',
+      );
+    });
+
+    it('should auto-detect max_tokens when samplingParams exists but max_tokens is missing', async () => {
+      const modelProvidersConfig: ModelProvidersConfig = {
+        openai: [
+          {
+            id: 'gpt-4',
+            name: 'GPT-4',
+            baseUrl: 'https://api.openai.example.com/v1',
+            generationConfig: {
+              samplingParams: { temperature: 0.7 }, // max_tokens not defined
+            },
+          },
+        ],
+      };
+
+      const modelsConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_OPENAI,
+        modelProvidersConfig,
+      });
+
+      await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4');
+
+      const gc = currentGenerationConfig(modelsConfig);
+      // Should preserve temperature from provider and add max_tokens
+      expect(gc.samplingParams?.temperature).toBe(0.7);
+      expect(gc.samplingParams?.max_tokens).toBe(16384);
+
+      const sources = modelsConfig.getGenerationConfigSources();
+      expect(sources['samplingParams']?.kind).toBe('computed');
+      expect(sources['samplingParams']?.detail).toBe(
+        'max_tokens auto-detected from model (other params from modelProviders)',
+      );
+    });
+
+    it('should not override existing max_tokens from modelProviders', async () => {
+      const modelProvidersConfig: ModelProvidersConfig = {
+        openai: [
+          {
+            id: 'gpt-4',
+            name: 'GPT-4',
+            baseUrl: 'https://api.openai.example.com/v1',
+            generationConfig: {
+              samplingParams: { temperature: 0.7, max_tokens: 4096 },
+            },
+          },
+        ],
+      };
+
+      const modelsConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_OPENAI,
+        modelProvidersConfig,
+      });
+
+      await modelsConfig.switchModel(AuthType.USE_OPENAI, 'gpt-4');
+
+      const gc = currentGenerationConfig(modelsConfig);
+      // Should preserve both values from provider
+      expect(gc.samplingParams?.temperature).toBe(0.7);
+      expect(gc.samplingParams?.max_tokens).toBe(4096);
+
+      const sources = modelsConfig.getGenerationConfigSources();
+      expect(sources['samplingParams']?.kind).toBe('modelProviders');
+    });
+
+    it('should use correct output limit for different model families', async () => {
+      const modelProvidersConfig: ModelProvidersConfig = {
+        anthropic: [
+          {
+            id: 'claude-3-opus',
+            name: 'Claude 3 Opus',
+            baseUrl: 'https://api.anthropic.example.com/v1',
+          },
+        ],
+        gemini: [
+          {
+            id: 'gemini-pro',
+            name: 'Gemini Pro',
+            baseUrl: 'https://api.gemini.example.com/v1',
+          },
+        ],
+      };
+
+      // Test Claude model (64K output limit)
+      const claudeConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_ANTHROPIC,
+        modelProvidersConfig,
+      });
+
+      await claudeConfig.switchModel(AuthType.USE_ANTHROPIC, 'claude-3-opus');
+
+      let gc = currentGenerationConfig(claudeConfig);
+      expect(gc.samplingParams?.max_tokens).toBe(65536); // 64K = 2^16
+
+      // Test Gemini model (8K output limit)
+      const geminiConfig = new ModelsConfig({
+        initialAuthType: AuthType.USE_GEMINI,
+        modelProvidersConfig,
+      });
+
+      await geminiConfig.switchModel(AuthType.USE_GEMINI, 'gemini-pro');
+
+      gc = currentGenerationConfig(geminiConfig);
+      expect(gc.samplingParams?.max_tokens).toBe(8192);
+    });
+  });
 });
diff --git a/packages/core/src/models/modelsConfig.ts b/packages/core/src/models/modelsConfig.ts
@@ -772,6 +772,25 @@ export class ModelsConfig {
       };
     }
 
+    // max_tokens fallback: auto-detect from model when not set by provider.
+    // Without this, requests to non-Qwen models (Claude, GPT, etc.) may omit
+    // max_tokens entirely, causing the API to use a small default (e.g. 4096)
+    // and truncating long responses mid-tool-call.
+    if (!this._generationConfig.samplingParams?.max_tokens) {
+      const outputLimit = tokenLimit(model.id, 'output');
+      if (!this._generationConfig.samplingParams) {
+        this._generationConfig.samplingParams = {};
+      }
+      this._generationConfig.samplingParams.max_tokens = outputLimit;
+      const existingSource = this.generationConfigSources['samplingParams'];
+      this.generationConfigSources['samplingParams'] = {
+        kind: 'computed',
+        detail: existingSource
+          ? `max_tokens auto-detected from model (other params from ${existingSource.kind})`
+          : 'max_tokens auto-detected from model',
+      };
+    }
+
     // modalities fallback: auto-detect from model when not set by provider
     if (gc.modalities === undefined) {
       this._generationConfig.modalities = defaultModalities(model.id);

diff --git a/packages/core/src/services/shellExecutionService.test.ts b/packages/core/src/services/shellExecutionService.test.ts
@@ -124,8 +124,8 @@ const setupConflictingPathEnv = () => {
 };
 
 const expectNormalizedWindowsPathEnv = (env: NodeJS.ProcessEnv) => {
-  expect(env.PATH).toBe(EXPECTED_MERGED_WINDOWS_PATH);
-  expect(env.Path).toBeUndefined();
+  expect(env['PATH']).toBe(EXPECTED_MERGED_WINDOWS_PATH);
+  expect(env['Path']).toBeUndefined();
 };
 
 describe('ShellExecutionService', () => {