Skip to content

Commit fd307ef

Browse files
committed
feat: move shell efficiency guidelines to tool description
- Relocates Quiet Flags and Pagination instructions from the general system prompt to the 'run_shell_command' tool declaration. - Ensures efficiency guidelines are provided contextually when the model considers shell tool use. - Retains guidelines in legacy snippets for backward compatibility with older models. - Adds comprehensive behavioral evaluations in 'evals/shell-efficiency.eval.ts' to verify adherence to efficiency best practices and respect for the 'enableShellOutputEfficiency' setting.
1 parent 8cbe851 commit fd307ef

8 files changed

Lines changed: 159 additions & 91 deletions

File tree

evals/shell-efficiency.eval.ts

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import { describe, expect } from 'vitest';
8+
import { evalTest } from './test-helper.js';
9+
10+
describe('Shell Efficiency', () => {
11+
evalTest('ALWAYS_PASSES', {
12+
name: 'should use --silent/--quiet flags when installing packages',
13+
prompt: 'Install the "lodash" package using npm.',
14+
assert: async (rig) => {
15+
const toolCalls = rig.readToolLogs();
16+
const shellCalls = toolCalls.filter(
17+
(call) => call.toolRequest.name === 'run_shell_command',
18+
);
19+
20+
const hasEfficiencyFlag = shellCalls.some((call) => {
21+
let args = call.toolRequest.args;
22+
if (typeof args === 'string') {
23+
try {
24+
args = JSON.parse(args);
25+
} catch (e) {
26+
// Ignore parse errors
27+
}
28+
}
29+
const cmd = typeof args === 'string' ? args : (args as any)['command'];
30+
return (
31+
cmd &&
32+
cmd.includes('npm install') &&
33+
(cmd.includes('--silent') ||
34+
cmd.includes('--quiet') ||
35+
cmd.includes('-q'))
36+
);
37+
});
38+
39+
expect(
40+
hasEfficiencyFlag,
41+
`Expected agent to use efficiency flags for npm install. Commands used: ${shellCalls
42+
.map((c) => (c.toolRequest.args as any)['command'])
43+
.join(', ')}`,
44+
).toBe(true);
45+
},
46+
});
47+
48+
evalTest('ALWAYS_PASSES', {
49+
name: 'should use --no-pager with git commands',
50+
prompt: 'Show the git log.',
51+
assert: async (rig) => {
52+
const toolCalls = rig.readToolLogs();
53+
const shellCalls = toolCalls.filter(
54+
(call) => call.toolRequest.name === 'run_shell_command',
55+
);
56+
57+
const hasNoPager = shellCalls.some((call) => {
58+
let args = call.toolRequest.args;
59+
if (typeof args === 'string') {
60+
try {
61+
args = JSON.parse(args);
62+
} catch (e) {
63+
// Ignore parse errors
64+
}
65+
}
66+
const cmd = typeof args === 'string' ? args : (args as any)['command'];
67+
return cmd && cmd.includes('git') && cmd.includes('--no-pager');
68+
});
69+
70+
expect(
71+
hasNoPager,
72+
`Expected agent to use --no-pager with git. Commands used: ${shellCalls
73+
.map((c) => (c.toolRequest.args as any)['command'])
74+
.join(', ')}`,
75+
).toBe(true);
76+
},
77+
});
78+
79+
evalTest('ALWAYS_PASSES', {
80+
name: 'should NOT use efficiency flags when enableShellOutputEfficiency is disabled',
81+
params: {
82+
settings: {
83+
tools: {
84+
shell: {
85+
enableShellOutputEfficiency: false,
86+
},
87+
},
88+
},
89+
},
90+
prompt: 'Install the "lodash" package using npm.',
91+
assert: async (rig) => {
92+
const toolCalls = rig.readToolLogs();
93+
const shellCalls = toolCalls.filter(
94+
(call) => call.toolRequest.name === 'run_shell_command',
95+
);
96+
97+
const hasEfficiencyFlag = shellCalls.some((call) => {
98+
let args = call.toolRequest.args;
99+
if (typeof args === 'string') {
100+
try {
101+
args = JSON.parse(args);
102+
} catch (e) {
103+
// Ignore parse errors
104+
}
105+
}
106+
const cmd = typeof args === 'string' ? args : (args as any)['command'];
107+
return (
108+
cmd &&
109+
cmd.includes('npm install') &&
110+
(cmd.includes('--silent') ||
111+
cmd.includes('--quiet') ||
112+
cmd.includes('-q'))
113+
);
114+
});
115+
116+
expect(
117+
hasEfficiencyFlag,
118+
'Agent used efficiency flags even though enableShellOutputEfficiency was disabled',
119+
).toBe(false);
120+
},
121+
});
122+
});

packages/core/src/core/__snapshots__/prompts.test.ts.snap

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -592,11 +592,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
592592
593593
# Operational Guidelines
594594
595-
## Shell Tool Efficiency
596-
597-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
598-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
599-
600595
## Tone and Style
601596
602597
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -706,11 +701,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
706701
707702
# Operational Guidelines
708703
709-
## Shell Tool Efficiency
710-
711-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
712-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
713-
714704
## Tone and Style
715705
716706
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -803,11 +793,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
803793
804794
# Operational Guidelines
805795
806-
## Shell Tool Efficiency
807-
808-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
809-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
810-
811796
## Tone and Style
812797
813798
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -1391,11 +1376,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
13911376
13921377
# Operational Guidelines
13931378
1394-
## Shell Tool Efficiency
1395-
1396-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
1397-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
1398-
13991379
## Tone and Style
14001380
14011381
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -1514,11 +1494,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
15141494
15151495
# Operational Guidelines
15161496
1517-
## Shell Tool Efficiency
1518-
1519-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
1520-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
1521-
15221497
## Tone and Style
15231498
15241499
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -1637,11 +1612,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
16371612
16381613
# Operational Guidelines
16391614
1640-
## Shell Tool Efficiency
1641-
1642-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
1643-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
1644-
16451615
## Tone and Style
16461616
16471617
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -1868,11 +1838,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
18681838
18691839
# Operational Guidelines
18701840
1871-
## Shell Tool Efficiency
1872-
1873-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
1874-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
1875-
18761841
## Tone and Style
18771842
18781843
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -2099,11 +2064,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
20992064
21002065
# Operational Guidelines
21012066
2102-
## Shell Tool Efficiency
2103-
2104-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
2105-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
2106-
21072067
## Tone and Style
21082068
21092069
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -2218,11 +2178,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
22182178
22192179
# Operational Guidelines
22202180
2221-
## Shell Tool Efficiency
2222-
2223-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
2224-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
2225-
22262181
## Tone and Style
22272182
22282183
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -2448,11 +2403,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
24482403
24492404
# Operational Guidelines
24502405
2451-
## Shell Tool Efficiency
2452-
2453-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
2454-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
2455-
24562406
## Tone and Style
24572407
24582408
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -2567,11 +2517,6 @@ Operate using a **Research -> Strategy -> Execution** lifecycle. For the Executi
25672517
25682518
# Operational Guidelines
25692519
2570-
## Shell Tool Efficiency
2571-
2572-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
2573-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
2574-
25752520
## Tone and Style
25762521
25772522
- **Role:** A senior software engineer and collaborative peer programmer.

packages/core/src/core/prompts.test.ts

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -463,26 +463,6 @@ describe('Core System Prompt (prompts.ts)', () => {
463463
});
464464

465465
describe('Platform-specific and Background Process instructions', () => {
466-
it('should include Windows-specific shell efficiency commands on win32', () => {
467-
mockPlatform('win32');
468-
const prompt = getCoreSystemPrompt(mockConfig);
469-
expect(prompt).toContain(
470-
"using commands like 'type' or 'findstr' (on CMD) and 'Get-Content' or 'Select-String' (on PowerShell)",
471-
);
472-
expect(prompt).not.toContain(
473-
"using commands like 'grep', 'tail', 'head'",
474-
);
475-
});
476-
477-
it('should include generic shell efficiency commands on non-Windows', () => {
478-
mockPlatform('linux');
479-
const prompt = getCoreSystemPrompt(mockConfig);
480-
expect(prompt).toContain("using commands like 'grep', 'tail', 'head'");
481-
expect(prompt).not.toContain(
482-
"using commands like 'type' or 'findstr' (on CMD) and 'Get-Content' or 'Select-String' (on PowerShell)",
483-
);
484-
});
485-
486466
it('should use is_background parameter in background process instructions', () => {
487467
const prompt = getCoreSystemPrompt(mockConfig);
488468
expect(prompt).toContain(

packages/core/src/prompts/snippets.legacy.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ export function renderOperationalGuidelines(
245245
if (!options) return '';
246246
return `
247247
# Operational Guidelines
248+
248249
${shellEfficiencyGuidelines(options.enableShellEfficiency)}
249250
250251
## Tone and Style (CLI Interaction)

packages/core/src/prompts/snippets.ts

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ export interface PrimaryWorkflowsOptions {
5555
export interface OperationalGuidelinesOptions {
5656
interactive: boolean;
5757
isGemini3: boolean;
58-
enableShellEfficiency: boolean;
5958
interactiveShellEnabled: boolean;
6059
}
6160

@@ -259,8 +258,6 @@ export function renderOperationalGuidelines(
259258
return `
260259
# Operational Guidelines
261260
262-
${shellEfficiencyGuidelines(options.enableShellEfficiency)}
263-
264261
## Tone and Style
265262
266263
- **Role:** A senior software engineer and collaborative peer programmer.
@@ -517,15 +514,6 @@ function planningPhaseSuggestion(options: PrimaryWorkflowsOptions): string {
517514
return '';
518515
}
519516

520-
function shellEfficiencyGuidelines(enabled: boolean): string {
521-
if (!enabled) return '';
522-
return `
523-
## Shell Tool Efficiency
524-
525-
- **Quiet Flags:** Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
526-
- **Pagination:** Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).`;
527-
}
528-
529517
function toneAndStyleNoChitchat(isGemini3: boolean): string {
530518
return isGemini3
531519
? `

packages/core/src/tools/__snapshots__/shell.test.ts.snap

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
exports[`ShellTool > getDescription > should return the non-windows description when not on windows 1`] = `
44
"This tool executes a given shell command as \`bash -c <command>\`. Command can start background processes using \`&\`. Command is executed as a subprocess that leads its own process group. Command process group can be terminated as \`kill -- -PGID\` or signaled as \`kill -s SIGNAL -- -PGID\`.
55
6+
Efficiency Guidelines:
7+
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
8+
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
9+
610
The following information is returned:
711
812
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.
@@ -16,6 +20,10 @@ exports[`ShellTool > getDescription > should return the non-windows description
1620
exports[`ShellTool > getDescription > should return the windows description when on windows 1`] = `
1721
"This tool executes a given shell command as \`powershell.exe -NoProfile -Command <command>\`. Command can start background processes using PowerShell constructs such as \`Start-Process -NoNewWindow\` or \`Start-Job\`.
1822
23+
Efficiency Guidelines:
24+
- Quiet Flags: Always prefer silent or quiet flags (e.g., \`npm install --silent\`, \`git --no-pager\`) to reduce output volume while still capturing necessary information.
25+
- Pagination: Always disable terminal pagination to ensure commands terminate (e.g., use \`git --no-pager\`, \`systemctl --no-pager\`, or set \`PAGER=cat\`).
26+
1927
The following information is returned:
2028
2129
Output: Combined stdout/stderr. Can be \`(empty)\` or partial on error and for any unwaited background processes.

packages/core/src/tools/shell.test.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ describe('ShellTool', () => {
130130
getGeminiClient: vi.fn().mockReturnValue({}),
131131
getShellToolInactivityTimeout: vi.fn().mockReturnValue(1000),
132132
getEnableInteractiveShell: vi.fn().mockReturnValue(false),
133+
getEnableShellOutputEfficiency: vi.fn().mockReturnValue(true),
133134
sanitizationConfig: {},
134135
} as unknown as Config;
135136

@@ -633,6 +634,15 @@ describe('ShellTool', () => {
633634
const shellTool = new ShellTool(mockConfig, createMockMessageBus());
634635
expect(shellTool.description).toMatchSnapshot();
635636
});
637+
638+
it('should not include efficiency guidelines when disabled', () => {
639+
mockPlatform.mockReturnValue('linux');
640+
vi.mocked(mockConfig.getEnableShellOutputEfficiency).mockReturnValue(
641+
false,
642+
);
643+
const shellTool = new ShellTool(mockConfig, createMockMessageBus());
644+
expect(shellTool.description).not.toContain('Efficiency Guidelines:');
645+
});
636646
});
637647

638648
describe('llmContent output format', () => {

0 commit comments

Comments
 (0)