fix(core): prevent subagent bypass in plan mode (#18484)

jerop · web-flow · commit e3796d137afb · 2026-02-06T22:55:00.000Z
diff --git a/packages/cli/src/config/policy-engine.integration.test.ts b/packages/cli/src/config/policy-engine.integration.test.ts
@@ -434,8 +434,8 @@ describe('Policy Engine Integration Tests', () => {
       expect(mcpServerRule?.priority).toBe(2.1); // MCP allowed server
 
       const readOnlyToolRule = rules.find((r) => r.toolName === 'glob');
-      // Priority 50 in default tier → 1.05
-      expect(readOnlyToolRule?.priority).toBeCloseTo(1.05, 5);
+      // Priority 70 in default tier → 1.07 (Overriding Plan Mode Deny)
+      expect(readOnlyToolRule?.priority).toBeCloseTo(1.07, 5);
 
       // Verify the engine applies these priorities correctly
       expect(
@@ -590,8 +590,8 @@ describe('Policy Engine Integration Tests', () => {
       expect(server1Rule?.priority).toBe(2.1); // Allowed servers (user tier)
 
       const globRule = rules.find((r) => r.toolName === 'glob');
-      // Priority 50 in default tier → 1.05
-      expect(globRule?.priority).toBeCloseTo(1.05, 5); // Auto-accept read-only
+      // Priority 70 in default tier → 1.07
+      expect(globRule?.priority).toBeCloseTo(1.07, 5); // Auto-accept read-only
 
       // The PolicyEngine will sort these by priority when it's created
       const engine = new PolicyEngine(config);
diff --git a/packages/core/src/agents/registry.ts b/packages/core/src/agents/registry.ts
@@ -21,7 +21,7 @@ import {
   type ModelConfig,
   ModelConfigService,
 } from '../services/modelConfigService.js';
-import { PolicyDecision } from '../policy/types.js';
+import { PolicyDecision, PRIORITY_SUBAGENT_TOOL } from '../policy/types.js';
 
 /**
  * Returns the model config alias for a given agent definition.
@@ -297,7 +297,7 @@ export class AgentRegistry {
         definition.kind === 'local'
           ? PolicyDecision.ALLOW
           : PolicyDecision.ASK_USER,
-      priority: 1.05,
+      priority: PRIORITY_SUBAGENT_TOOL,
       source: 'AgentRegistry (Dynamic)',
     });
   }
diff --git a/packages/core/src/policy/config.ts b/packages/core/src/policy/config.ts
@@ -194,6 +194,8 @@ export async function createPolicyEngineConfig(
   //   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
   //   15: Auto-edit tool override (becomes 1.015 in default tier)
   //   50: Read-only tools (becomes 1.050 in default tier)
+  //   60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
+  //   70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
   //   999: YOLO mode allow-all (becomes 1.999 in default tier)
 
   // MCP servers that are explicitly excluded in settings.mcp.excluded
diff --git a/packages/core/src/policy/policies/plan.toml b/packages/core/src/policy/policies/plan.toml
@@ -21,66 +21,36 @@
 #
 # TOML policy priorities (before transformation):
 #   10: Write tools default to ASK_USER (becomes 1.010 in default tier)
-#   20: Plan mode catch-all DENY override (becomes 1.020 in default tier)
-#   50: Read-only tools (becomes 1.050 in default tier)
+#   60: Plan mode catch-all DENY override (becomes 1.060 in default tier)
+#   70: Plan mode explicit ALLOW override (becomes 1.070 in default tier)
 #   999: YOLO mode allow-all (becomes 1.999 in default tier)
 
 # Catch-All: Deny everything by default in Plan mode.
 
 [[rule]]
 decision = "deny"
-priority = 20
+priority = 60
 modes = ["plan"]
 deny_message = "You are in Plan Mode - adjust your prompt to only use read and search tools."
 
 # Explicitly Allow Read-Only Tools in Plan mode.
 
 [[rule]]
-toolName = "glob"
+toolName = ["glob", "grep_search", "list_directory", "read_file", "google_web_search"]
 decision = "allow"
-priority = 50
+priority = 70
 modes = ["plan"]
 
 [[rule]]
-toolName = "grep_search"
-decision = "allow"
-priority = 50
-modes = ["plan"]
-
-[[rule]]
-toolName = "list_directory"
-decision = "allow"
-priority = 50
-modes = ["plan"]
-
-[[rule]]
-toolName = "read_file"
-decision = "allow"
-priority = 50
-modes = ["plan"]
-
-[[rule]]
-toolName = "google_web_search"
-decision = "allow"
-priority = 50
-modes = ["plan"]
-
-[[rule]]
-toolName = "ask_user"
-decision = "ask_user"
-priority = 50
-modes = ["plan"]
-
-[[rule]]
-toolName = "exit_plan_mode"
+toolName = ["ask_user", "exit_plan_mode"]
 decision = "ask_user"
-priority = 50
+priority = 70
 modes = ["plan"]
 
 # Allow write_file and replace for .md files in plans directory
 [[rule]]
 toolName = ["write_file", "replace"]
 decision = "allow"
-priority = 50
+priority = 70
 modes = ["plan"]
 argsPattern = "\"file_path\":\"[^\"]+/\\.gemini/tmp/[a-zA-Z0-9_-]+/plans/[a-zA-Z0-9_-]+\\.md\""
diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts
@@ -13,6 +13,7 @@ import {
   type SafetyCheckerRule,
   InProcessCheckerType,
   ApprovalMode,
+  PRIORITY_SUBAGENT_TOOL,
 } from './types.js';
 import type { FunctionCall } from '@google/genai';
 import { SafetyCheckDecision } from '../safety/protocol.js';
@@ -1481,6 +1482,37 @@ describe('PolicyEngine', () => {
     });
   });
 
+  describe('Plan Mode vs Subagent Priority (Regression)', () => {
+    it('should DENY subagents in Plan Mode despite dynamic allow rules', async () => {
+      // Plan Mode Deny (1.06) > Subagent Allow (1.05)
+
+      const fixedRules: PolicyRule[] = [
+        {
+          decision: PolicyDecision.DENY,
+          priority: 1.06,
+          modes: [ApprovalMode.PLAN],
+        },
+        {
+          toolName: 'codebase_investigator',
+          decision: PolicyDecision.ALLOW,
+          priority: PRIORITY_SUBAGENT_TOOL,
+        },
+      ];
+
+      const fixedEngine = new PolicyEngine({
+        rules: fixedRules,
+        approvalMode: ApprovalMode.PLAN,
+      });
+
+      const fixedResult = await fixedEngine.check(
+        { name: 'codebase_investigator' },
+        undefined,
+      );
+
+      expect(fixedResult.decision).toBe(PolicyDecision.DENY);
+    });
+  });
+
   describe('shell command parsing failure', () => {
     it('should return ALLOW in YOLO mode even if shell command parsing fails', async () => {
       const { splitCommands } = await import('../utils/shell-utils.js');
diff --git a/packages/core/src/policy/toml-loader.test.ts b/packages/core/src/policy/toml-loader.test.ts
@@ -5,12 +5,21 @@
  */
 
 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
-import { PolicyDecision } from './types.js';
+import {
+  PolicyDecision,
+  ApprovalMode,
+  PRIORITY_SUBAGENT_TOOL,
+} from './types.js';
 import * as fs from 'node:fs/promises';
 import * as path from 'node:path';
 import * as os from 'node:os';
+import { fileURLToPath } from 'node:url';
 import { loadPoliciesFromToml } from './toml-loader.js';
 import type { PolicyLoadResult } from './toml-loader.js';
+import { PolicyEngine } from './policy-engine.js';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
 
 describe('policy-toml-loader', () => {
   let tempDir: string;
@@ -500,4 +509,60 @@ priority = 100
       expect(error.message).toContain('Failed to read policy directory');
     });
   });
+
+  describe('Built-in Plan Mode Policy', () => {
+    it('should override default subagent rules when in Plan Mode', async () => {
+      const planTomlPath = path.resolve(__dirname, 'policies', 'plan.toml');
+      const fileContent = await fs.readFile(planTomlPath, 'utf-8');
+      const tempPolicyDir = await fs.mkdtemp(
+        path.join(os.tmpdir(), 'plan-policy-test-'),
+      );
+      try {
+        await fs.writeFile(path.join(tempPolicyDir, 'plan.toml'), fileContent);
+        const getPolicyTier = () => 1; // Default tier
+
+        // 1. Load the actual Plan Mode policies
+        const result = await loadPoliciesFromToml(
+          [tempPolicyDir],
+          getPolicyTier,
+        );
+
+        // 2. Initialize Policy Engine with these rules
+        const engine = new PolicyEngine({
+          rules: result.rules,
+          approvalMode: ApprovalMode.PLAN,
+        });
+
+        // 3. Simulate a Subagent being registered (Dynamic Rule)
+        engine.addRule({
+          toolName: 'codebase_investigator',
+          decision: PolicyDecision.ALLOW,
+          priority: PRIORITY_SUBAGENT_TOOL,
+          source: 'AgentRegistry (Dynamic)',
+        });
+
+        // 4. Verify Behavior:
+        // The Plan Mode "Catch-All Deny" (from plan.toml) should override the Subagent Allow
+        const checkResult = await engine.check(
+          { name: 'codebase_investigator' },
+          undefined,
+        );
+
+        expect(
+          checkResult.decision,
+          'Subagent should be DENIED in Plan Mode',
+        ).toBe(PolicyDecision.DENY);
+
+        // 5. Verify Explicit Allows still work
+        // e.g. 'read_file' should be allowed because its priority in plan.toml (70) is higher than the deny (60)
+        const readResult = await engine.check({ name: 'read_file' }, undefined);
+        expect(
+          readResult.decision,
+          'Explicitly allowed tools (read_file) should be ALLOWED in Plan Mode',
+        ).toBe(PolicyDecision.ALLOW);
+      } finally {
+        await fs.rm(tempPolicyDir, { recursive: true, force: true });
+      }
+    });
+  });
 });
diff --git a/packages/core/src/policy/types.ts b/packages/core/src/policy/types.ts
@@ -276,3 +276,9 @@ export interface CheckResult {
   decision: PolicyDecision;
   rule?: PolicyRule;
 }
+
+/**
+ * Priority for subagent tools (registered dynamically).
+ * Effective priority matching Tier 1 (Default) read-only tools.
+ */
+export const PRIORITY_SUBAGENT_TOOL = 1.05;