Skip to content

Commit 6ddf758

Browse files
committed
feat(core): Enable generalist agent
1 parent 99fa700 commit 6ddf758

6 files changed

Lines changed: 503 additions & 9 deletions

File tree

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/**
2+
* @license
3+
* Copyright 2026 Google LLC
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
import { describe, expect } from 'vitest';
8+
import { appEvalTest } from './app-test-helper.js';
9+
10+
describe('generalist_delegation', () => {
11+
// --- Positive Evals (Should Delegate) ---
12+
13+
appEvalTest('ALWAYS_PASSES', {
14+
name: 'should delegate batch error fixing to generalist agent',
15+
configOverrides: {
16+
agents: {
17+
overrides: {
18+
generalist: { enabled: true },
19+
},
20+
},
21+
model: 'gemini-3-flash-preview',
22+
experimental: {
23+
enableAgents: true,
24+
},
25+
excludeTools: ['run_shell_command'],
26+
},
27+
files: {
28+
'file1.ts': 'console.log("no semi")',
29+
'file2.ts': 'console.log("no semi")',
30+
'file3.ts': 'console.log("no semi")',
31+
'file4.ts': 'console.log("no semi")',
32+
'file5.ts': 'console.log("no semi")',
33+
'file6.ts': 'console.log("no semi")',
34+
'file7.ts': 'console.log("no semi")',
35+
'file8.ts': 'console.log("no semi")',
36+
'file9.ts': 'console.log("no semi")',
37+
'file10.ts': 'console.log("no semi")',
38+
},
39+
prompt:
40+
'I have 10 files (file1.ts to file10.ts) that are missing semicolons. Can you fix them?',
41+
setup: async (rig) => {
42+
rig.setBreakpoint(['generalist']);
43+
},
44+
assert: async (rig) => {
45+
const confirmation = await rig.waitForPendingConfirmation(
46+
'generalist',
47+
60000,
48+
);
49+
expect(
50+
confirmation,
51+
'Expected a tool call for generalist agent',
52+
).toBeTruthy();
53+
await rig.resolveTool(confirmation);
54+
await rig.waitForIdle(60000);
55+
},
56+
});
57+
58+
appEvalTest('ALWAYS_PASSES', {
59+
name: 'should autonomously delegate complex batch task to generalist agent',
60+
configOverrides: {
61+
agents: {
62+
overrides: {
63+
generalist: { enabled: true },
64+
},
65+
},
66+
model: 'gemini-3-flash-preview',
67+
experimental: {
68+
enableAgents: true,
69+
},
70+
excludeTools: ['run_shell_command'],
71+
},
72+
files: {
73+
'src/a.ts': 'export const a = 1;',
74+
'src/b.ts': 'export const b = 2;',
75+
'src/c.ts': 'export const c = 3;',
76+
'src/d.ts': 'export const d = 4;',
77+
'src/e.ts': 'export const e = 5;',
78+
},
79+
prompt:
80+
'Please update all files in the src directory. For each file, add a comment at the top that says "Processed by Gemini".',
81+
setup: async (rig) => {
82+
rig.setBreakpoint(['generalist']);
83+
},
84+
assert: async (rig) => {
85+
const confirmation = await rig.waitForPendingConfirmation(
86+
'generalist',
87+
60000,
88+
);
89+
expect(
90+
confirmation,
91+
'Expected autonomously delegate to generalist for batch task',
92+
).toBeTruthy();
93+
await rig.resolveTool(confirmation);
94+
await rig.waitForIdle(60000);
95+
},
96+
});
97+
98+
// --- Negative Evals (Should NOT Delegate - Assertive Handling) ---
99+
100+
appEvalTest('ALWAYS_PASSES', {
101+
name: 'should NOT delegate simple read and fix to generalist agent',
102+
configOverrides: {
103+
agents: {
104+
overrides: {
105+
generalist: { enabled: true },
106+
},
107+
},
108+
model: 'gemini-3-flash-preview',
109+
experimental: {
110+
enableAgents: true,
111+
},
112+
excludeTools: ['run_shell_command'],
113+
},
114+
files: {
115+
'README.md': 'This is a proyect.',
116+
},
117+
prompt:
118+
'There is a typo in README.md ("proyect"). Please fix it to "project".',
119+
setup: async (rig) => {
120+
// Break on everything to see what it calls
121+
rig.setBreakpoint(['*']);
122+
},
123+
assert: async (rig) => {
124+
await rig.drainBreakpointsUntilIdle((confirmation) => {
125+
expect(
126+
confirmation.toolName,
127+
`Agent should NOT have delegated to generalist.`,
128+
).not.toBe('generalist');
129+
});
130+
131+
const output = rig.getStaticOutput();
132+
expect(output).toMatch(/project/i);
133+
},
134+
});
135+
136+
appEvalTest('ALWAYS_PASSES', {
137+
name: 'should NOT delegate simple direct question to generalist agent',
138+
configOverrides: {
139+
agents: {
140+
overrides: {
141+
generalist: { enabled: true },
142+
},
143+
},
144+
model: 'gemini-3-flash-preview',
145+
experimental: {
146+
enableAgents: true,
147+
},
148+
excludeTools: ['run_shell_command'],
149+
},
150+
files: {
151+
'src/VERSION': '1.2.3',
152+
},
153+
prompt: 'Can you tell me the version number in the src folder?',
154+
setup: async (rig) => {
155+
rig.setBreakpoint(['*']);
156+
},
157+
assert: async (rig) => {
158+
await rig.drainBreakpointsUntilIdle((confirmation) => {
159+
expect(
160+
confirmation.toolName,
161+
`Agent should NOT have delegated to generalist.`,
162+
).not.toBe('generalist');
163+
});
164+
165+
const output = rig.getStaticOutput();
166+
expect(output).toMatch(/1\.2\.3/);
167+
},
168+
});
169+
});

0 commit comments

Comments
 (0)