Skip to content

Commit 1bec83e

Browse files
committed
test(core): unpin evals while keeping integration tests pinned
- Removes hardcoded model from TestRig for unpinned evaluations.\n- Pins integration tests to gemini-2.5-pro via new GEMINI_TEST_TYPE=integration env var.\n- Moves shell efficiency evals to USUALLY_PASSES to prevent PR blocking while tracking Gemini 3.
1 parent 4494f9e commit 1bec83e

3 files changed

Lines changed: 13 additions & 6 deletions

File tree

evals/shell-efficiency.eval.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ describe('Shell Efficiency', () => {
2020
return typeof args === 'string' ? args : (args as any)['command'];
2121
};
2222

23-
evalTest('ALWAYS_PASSES', {
23+
evalTest('USUALLY_PASSES', {
2424
name: 'should use --silent/--quiet flags when installing packages',
2525
prompt: 'Install the "lodash" package using npm.',
2626
assert: async (rig) => {
@@ -49,7 +49,7 @@ describe('Shell Efficiency', () => {
4949
},
5050
});
5151

52-
evalTest('ALWAYS_PASSES', {
52+
evalTest('USUALLY_PASSES', {
5353
name: 'should use --no-pager with git commands',
5454
prompt: 'Show the git log.',
5555
assert: async (rig) => {
@@ -72,7 +72,7 @@ describe('Shell Efficiency', () => {
7272
},
7373
});
7474

75-
evalTest('ALWAYS_PASSES', {
75+
evalTest('USUALLY_PASSES', {
7676
name: 'should NOT use efficiency flags when enableShellOutputEfficiency is disabled',
7777
params: {
7878
settings: {

integration-tests/vitest.config.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,8 @@ export default defineConfig({
2020
maxThreads: 16,
2121
},
2222
},
23+
env: {
24+
GEMINI_TEST_TYPE: 'integration',
25+
},
2326
},
2427
});

packages/test-utils/src/test-rig.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,13 @@ export class TestRig {
408408
ui: {
409409
useAlternateBuffer: true,
410410
},
411-
model: {
412-
name: DEFAULT_GEMINI_MODEL,
413-
},
411+
...(env['GEMINI_TEST_TYPE'] === 'integration'
412+
? {
413+
model: {
414+
name: DEFAULT_GEMINI_MODEL,
415+
},
416+
}
417+
: {}),
414418
sandbox:
415419
env['GEMINI_SANDBOX'] !== 'false' ? env['GEMINI_SANDBOX'] : false,
416420
// Don't show the IDE connection dialog when running from VsCode

0 commit comments

Comments
 (0)