|
8 | 8 | from agentworld.research import ( |
9 | 9 | format_experiment_manifest_for_prompt, |
10 | 10 | format_hypothesis_manifest_for_prompt, |
| 11 | + validate_experiment_execution, |
11 | 12 | validate_experiment_manifest, |
12 | 13 | write_experiment_manifest, |
13 | 14 | write_hypothesis_manifest, |
@@ -162,6 +163,55 @@ def test_write_experiment_manifest_collects_schema_and_context(self) -> None: |
162 | 163 | self.assertEqual(schema["row_count"], 2) |
163 | 164 | self.assertIn("Result Artifacts", format_experiment_manifest_for_prompt(manifest)) |
164 | 165 |
|
| 166 | + def test_validate_experiment_execution_rejects_blocked_results(self) -> None: |
| 167 | + with tempfile.TemporaryDirectory() as tmp: |
| 168 | + workspace = create_run_workspace(runs_dir=Path(tmp), run_id="blocked", goal="test") |
| 169 | + write_text( |
| 170 | + workspace.results_dir / "results.json", |
| 171 | + json.dumps( |
| 172 | + { |
| 173 | + "experiments_executed": False, |
| 174 | + "execution_status": "blocked", |
| 175 | + "execution_blocker": {"reason": "permission approval required"}, |
| 176 | + }, |
| 177 | + indent=2, |
| 178 | + ensure_ascii=True, |
| 179 | + ), |
| 180 | + ) |
| 181 | + |
| 182 | + problems = validate_experiment_execution(workspace) |
| 183 | + |
| 184 | + self.assertIn("results.json reports experiments_executed=false.", problems) |
| 185 | + self.assertIn("results.json reports execution_status=blocked.", problems) |
| 186 | + self.assertIn("results.json contains execution_blocker; the experiment did not complete.", problems) |
| 187 | + |
| 188 | + def test_validate_experiment_execution_checks_declared_outputs(self) -> None: |
| 189 | + with tempfile.TemporaryDirectory() as tmp: |
| 190 | + workspace = create_run_workspace(runs_dir=Path(tmp), run_id="outputs", goal="test") |
| 191 | + write_text(workspace.results_dir / "metrics.json", "{}\n") |
| 192 | + write_text( |
| 193 | + workspace.results_dir / "results.json", |
| 194 | + json.dumps( |
| 195 | + { |
| 196 | + "experiments_executed": True, |
| 197 | + "execution_status": "completed", |
| 198 | + "expected_outputs_on_success": { |
| 199 | + "metrics": "workspace/results/metrics.json", |
| 200 | + "figure": "workspace/figures/missing.png", |
| 201 | + }, |
| 202 | + }, |
| 203 | + indent=2, |
| 204 | + ensure_ascii=True, |
| 205 | + ), |
| 206 | + ) |
| 207 | + |
| 208 | + problems = validate_experiment_execution(workspace) |
| 209 | + |
| 210 | + self.assertEqual( |
| 211 | + problems, |
| 212 | + ["results.json expected output(s) are missing: workspace/figures/missing.png"], |
| 213 | + ) |
| 214 | + |
165 | 215 |
|
166 | 216 | if __name__ == "__main__": |
167 | 217 | unittest.main() |
0 commit comments