Skip to content

Commit 9c11ff2

Browse files
authored
test(evals): mark all save_memory evals as USUALLY_PASSES due to unreliability (#18786)
1 parent b3ecac7 commit 9c11ff2

1 file changed

Lines changed: 11 additions & 11 deletions

File tree

evals/save_memory.eval.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import {
1414
describe('save_memory', () => {
1515
const TEST_PREFIX = 'Save memory test: ';
1616
const rememberingFavoriteColor = "Agent remembers user's favorite color";
17-
evalTest('ALWAYS_PASSES', {
17+
evalTest('USUALLY_PASSES', {
1818
name: rememberingFavoriteColor,
1919
params: {
2020
settings: { tools: { core: ['save_memory'] } },
@@ -36,7 +36,7 @@ describe('save_memory', () => {
3636
},
3737
});
3838
const rememberingCommandRestrictions = 'Agent remembers command restrictions';
39-
evalTest('ALWAYS_PASSES', {
39+
evalTest('USUALLY_PASSES', {
4040
name: rememberingCommandRestrictions,
4141
params: {
4242
settings: { tools: { core: ['save_memory'] } },
@@ -57,7 +57,7 @@ describe('save_memory', () => {
5757
});
5858

5959
const rememberingWorkflow = 'Agent remembers workflow preferences';
60-
evalTest('ALWAYS_PASSES', {
60+
evalTest('USUALLY_PASSES', {
6161
name: rememberingWorkflow,
6262
params: {
6363
settings: { tools: { core: ['save_memory'] } },
@@ -79,7 +79,7 @@ describe('save_memory', () => {
7979

8080
const ignoringTemporaryInformation =
8181
'Agent ignores temporary conversation details';
82-
evalTest('ALWAYS_PASSES', {
82+
evalTest('USUALLY_PASSES', {
8383
name: ignoringTemporaryInformation,
8484
params: {
8585
settings: { tools: { core: ['save_memory'] } },
@@ -104,7 +104,7 @@ describe('save_memory', () => {
104104
});
105105

106106
const rememberingPetName = "Agent remembers user's pet's name";
107-
evalTest('ALWAYS_PASSES', {
107+
evalTest('USUALLY_PASSES', {
108108
name: rememberingPetName,
109109
params: {
110110
settings: { tools: { core: ['save_memory'] } },
@@ -125,7 +125,7 @@ describe('save_memory', () => {
125125
});
126126

127127
const rememberingCommandAlias = 'Agent remembers custom command aliases';
128-
evalTest('ALWAYS_PASSES', {
128+
evalTest('USUALLY_PASSES', {
129129
name: rememberingCommandAlias,
130130
params: {
131131
settings: { tools: { core: ['save_memory'] } },
@@ -147,7 +147,7 @@ describe('save_memory', () => {
147147

148148
const ignoringDbSchemaLocation =
149149
"Agent ignores workspace's database schema location";
150-
evalTest('ALWAYS_PASSES', {
150+
evalTest('USUALLY_PASSES', {
151151
name: ignoringDbSchemaLocation,
152152
params: {
153153
settings: {
@@ -178,7 +178,7 @@ describe('save_memory', () => {
178178

179179
const rememberingCodingStyle =
180180
"Agent remembers user's coding style preference";
181-
evalTest('ALWAYS_PASSES', {
181+
evalTest('USUALLY_PASSES', {
182182
name: rememberingCodingStyle,
183183
params: {
184184
settings: { tools: { core: ['save_memory'] } },
@@ -200,7 +200,7 @@ describe('save_memory', () => {
200200

201201
const ignoringBuildArtifactLocation =
202202
'Agent ignores workspace build artifact location';
203-
evalTest('ALWAYS_PASSES', {
203+
evalTest('USUALLY_PASSES', {
204204
name: ignoringBuildArtifactLocation,
205205
params: {
206206
settings: {
@@ -230,7 +230,7 @@ describe('save_memory', () => {
230230
});
231231

232232
const ignoringMainEntryPoint = "Agent ignores workspace's main entry point";
233-
evalTest('ALWAYS_PASSES', {
233+
evalTest('USUALLY_PASSES', {
234234
name: ignoringMainEntryPoint,
235235
params: {
236236
settings: {
@@ -260,7 +260,7 @@ describe('save_memory', () => {
260260
});
261261

262262
const rememberingBirthday = "Agent remembers user's birthday";
263-
evalTest('ALWAYS_PASSES', {
263+
evalTest('USUALLY_PASSES', {
264264
name: rememberingBirthday,
265265
params: {
266266
settings: { tools: { core: ['save_memory'] } },

0 commit comments

Comments
 (0)