55 */
66
77import { describe , expect } from 'vitest' ;
8- import { evalTest } from './test-helper.js' ;
8+ import { appEvalTest , AppEvalCase } from './app-test-helper.js' ;
9+ import { EvalPolicy } from './test-helper.js' ;
10+
11+ function askUserEvalTest ( policy : EvalPolicy , evalCase : AppEvalCase ) {
12+ return appEvalTest ( policy , {
13+ ...evalCase ,
14+ configOverrides : {
15+ ...evalCase . configOverrides ,
16+ general : {
17+ ...evalCase . configOverrides ?. general ,
18+ approvalMode : 'default' ,
19+ enableAutoUpdate : false ,
20+ enableAutoUpdateNotification : false ,
21+ } ,
22+ } ,
23+ files : {
24+ ...evalCase . files ,
25+ } ,
26+ } ) ;
27+ }
928
1029describe ( 'ask_user' , ( ) => {
11- evalTest ( 'USUALLY_PASSES' , {
30+ askUserEvalTest ( 'USUALLY_PASSES' , {
1231 name : 'Agent uses AskUser tool to present multiple choice options' ,
1332 prompt : `Use the ask_user tool to ask me what my favorite color is. Provide 3 options: red, green, or blue.` ,
33+ setup : async ( rig ) => {
34+ rig . setBreakpoint ( [ 'ask_user' ] ) ;
35+ } ,
1436 assert : async ( rig ) => {
15- const wasToolCalled = await rig . waitForToolCall ( 'ask_user' ) ;
16- expect ( wasToolCalled , 'Expected ask_user tool to be called' ) . toBe ( true ) ;
37+ const confirmation = await rig . waitForPendingConfirmation ( 'ask_user' ) ;
38+ expect (
39+ confirmation ,
40+ 'Expected a pending confirmation for ask_user tool' ,
41+ ) . toBeDefined ( ) ;
1742 } ,
1843 } ) ;
1944
20- evalTest ( 'USUALLY_PASSES' , {
45+ askUserEvalTest ( 'USUALLY_PASSES' , {
2146 name : 'Agent uses AskUser tool to clarify ambiguous requirements' ,
2247 files : {
2348 'package.json' : JSON . stringify ( { name : 'my-app' , version : '1.0.0' } ) ,
2449 } ,
2550 prompt : `I want to build a new feature in this app. Ask me questions to clarify the requirements before proceeding.` ,
51+ setup : async ( rig ) => {
52+ rig . setBreakpoint ( [ 'ask_user' ] ) ;
53+ } ,
2654 assert : async ( rig ) => {
27- const wasToolCalled = await rig . waitForToolCall ( 'ask_user' ) ;
28- expect ( wasToolCalled , 'Expected ask_user tool to be called' ) . toBe ( true ) ;
55+ const confirmation = await rig . waitForPendingConfirmation ( 'ask_user' ) ;
56+ expect (
57+ confirmation ,
58+ 'Expected a pending confirmation for ask_user tool' ,
59+ ) . toBeDefined ( ) ;
2960 } ,
3061 } ) ;
3162
32- evalTest ( 'USUALLY_PASSES' , {
63+ askUserEvalTest ( 'USUALLY_PASSES' , {
3364 name : 'Agent uses AskUser tool before performing significant ambiguous rework' ,
3465 files : {
3566 'packages/core/src/index.ts' : '// index\nexport const version = "1.0.0";' ,
@@ -39,54 +70,62 @@ describe('ask_user', () => {
3970 } ) ,
4071 'README.md' : '# Gemini CLI' ,
4172 } ,
42- prompt : `Refactor the entire core package to be better.` ,
73+ prompt : `I want to completely rewrite the core package to support the upcoming V2 architecture, but I haven't decided what that looks like yet. We need to figure out the requirements first. Can you ask me some questions to help nail down the design?` ,
74+ setup : async ( rig ) => {
75+ rig . setBreakpoint ( [ 'enter_plan_mode' , 'ask_user' ] ) ;
76+ } ,
4377 assert : async ( rig ) => {
44- const wasPlanModeCalled = await rig . waitForToolCall ( 'enter_plan_mode' ) ;
45- expect ( wasPlanModeCalled , 'Expected enter_plan_mode to be called' ) . toBe (
46- true ,
47- ) ;
78+ // It might call enter_plan_mode first.
79+ let confirmation = await rig . waitForPendingConfirmation ( [
80+ 'enter_plan_mode' ,
81+ 'ask_user' ,
82+ ] ) ;
83+ expect ( confirmation , 'Expected a tool call confirmation' ) . toBeDefined ( ) ;
84+
85+ if ( confirmation ?. name === 'enter_plan_mode' ) {
86+ rig . acceptConfirmation ( 'enter_plan_mode' ) ;
87+ confirmation = await rig . waitForPendingConfirmation ( 'ask_user' ) ;
88+ }
4889
49- const wasAskUserCalled = await rig . waitForToolCall ( 'ask_user' ) ;
5090 expect (
51- wasAskUserCalled ,
52- 'Expected ask_user tool to be called to clarify the significant rework' ,
53- ) . toBe ( true ) ;
91+ confirmation ?. toolName ,
92+ 'Expected ask_user to be called to clarify the significant rework' ,
93+ ) . toBe ( 'ask_user' ) ;
5494 } ,
5595 } ) ;
5696
5797 // --- Regression Tests for Recent Fixes ---
5898
59- // Regression test for issue #20177: Ensure the agent does not use `ask_user` to
99+ // Regression test for issue #20177: Ensure the agent does not use \ `ask_user\ ` to
60100 // confirm shell commands. Fixed via prompt refinements and tool definition
61101 // updates to clarify that shell command confirmation is handled by the UI.
62102 // See fix: https://github.com/google-gemini/gemini-cli/pull/20504
63- evalTest ( 'USUALLY_PASSES' , {
103+ askUserEvalTest ( 'USUALLY_PASSES' , {
64104 name : 'Agent does NOT use AskUser to confirm shell commands' ,
65105 files : {
66106 'package.json' : JSON . stringify ( {
67107 scripts : { build : 'echo building' } ,
68108 } ) ,
69109 } ,
70110 prompt : `Run 'npm run build' in the current directory.` ,
111+ setup : async ( rig ) => {
112+ rig . setBreakpoint ( [ 'run_shell_command' , 'ask_user' ] ) ;
113+ } ,
71114 assert : async ( rig ) => {
72- await rig . waitForTelemetryReady ( ) ;
73-
74- const toolLogs = rig . readToolLogs ( ) ;
75- const wasShellCalled = toolLogs . some (
76- ( log ) => log . toolRequest . name === 'run_shell_command' ,
77- ) ;
78- const wasAskUserCalled = toolLogs . some (
79- ( log ) => log . toolRequest . name === 'ask_user' ,
80- ) ;
115+ const confirmation = await rig . waitForPendingConfirmation ( [
116+ 'run_shell_command' ,
117+ 'ask_user' ,
118+ ] ) ;
81119
82120 expect (
83- wasShellCalled ,
84- 'Expected run_shell_command tool to be called' ,
85- ) . toBe ( true ) ;
121+ confirmation ,
122+ 'Expected a pending confirmation for a tool' ,
123+ ) . toBeDefined ( ) ;
124+
86125 expect (
87- wasAskUserCalled ,
126+ confirmation ?. toolName ,
88127 'ask_user should not be called to confirm shell commands' ,
89- ) . toBe ( false ) ;
128+ ) . toBe ( 'run_shell_command' ) ;
90129 } ,
91130 } ) ;
92131} ) ;
0 commit comments