Skip to content

Commit d2deabb

Browse files
authored
[OPIK-5757] [FE][SDK] Link experiment configuration to agent config versions (#6302)
* [OPIK-5757] [FE][SDK] Link experiment configuration to agent config versions When evaluate() or run_tests() is called with blueprint_id, the agent configuration reference (_blueprint_id + blueprint_version) is stored in experiment metadata. The experiment Configuration tab renders the version as a clickable link to the agent configuration page. * [OPIK-5757] [FE][SDK] Address PR review feedback - Fix catch block in TS SDK to capture error details - Guard against empty project_id in ConfigurationTab link data - Add unit test for blueprint fetch failure graceful degradation * [OPIK-5757] [FE][SDK] Address reviewer feedback: tests, types, cleanup - Add happy path + failure unit test for Python _merge_blueprint_into_config - Add blueprint metadata tests for TS SDK evaluate() - Narrow agent_config type from Dict[str, Any] to Dict[str, str] - Extract AgentConfigLinkData type import in ConfigurationTab
1 parent 4fc7d10 commit d2deabb

7 files changed

Lines changed: 235 additions & 5 deletions

File tree

apps/opik-frontend/src/v2/pages-shared/experiments/CompareExperimentsConfigCell/CompareExperimentsConfigCell.tsx

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import React from "react";
22
import isUndefined from "lodash/isUndefined";
33
import { CellContext } from "@tanstack/react-table";
4+
import { Link, useParams } from "@tanstack/react-router";
45
import CellWrapper from "@/shared/DataTableCells/CellWrapper";
56
import { ROW_HEIGHT } from "@/types/shared";
67
import TextDiff from "@/shared/CodeDiff/TextDiff";
@@ -16,26 +17,57 @@ export type CompareConfig = {
1617
different: boolean;
1718
};
1819

20+
export type AgentConfigLinkData = {
21+
projectId: string;
22+
blueprintId: string;
23+
};
24+
1925
type CustomMeta = {
2026
onlyDiff: boolean;
27+
agentConfigLinkData?: Record<string, AgentConfigLinkData>;
2128
};
2229

2330
const CompareExperimentsConfigCell: React.FC<
2431
CellContext<CompareConfig, unknown>
2532
> = (context) => {
2633
const { custom } = context.column.columnDef.meta ?? {};
27-
const { onlyDiff } = (custom ?? {}) as CustomMeta;
34+
const { onlyDiff, agentConfigLinkData } = (custom ?? {}) as CustomMeta;
2835
const experimentId = context.column?.id;
2936
const compareConfig = context.row.original;
37+
const { workspaceName } = useParams({ strict: false }) as {
38+
workspaceName?: string;
39+
};
3040

3141
const data = compareConfig.data[experimentId];
3242
const baseData = compareConfig.data[compareConfig.base];
43+
const linkData =
44+
compareConfig.name === "agent_configuration"
45+
? agentConfigLinkData?.[experimentId]
46+
: undefined;
3347

3448
const renderContent = () => {
3549
if (isUndefined(data)) {
3650
return <span className="px-1.5 py-2.5 text-light-slate">No value</span>;
3751
}
3852

53+
if (linkData && workspaceName) {
54+
return (
55+
<div className="comet-code size-full max-w-full overflow-hidden whitespace-pre-wrap break-words rounded-md border bg-code-block px-2 py-[11px]">
56+
<Link
57+
to="/$workspaceName/projects/$projectId/agent-configuration"
58+
params={{
59+
workspaceName,
60+
projectId: linkData.projectId,
61+
}}
62+
search={{ configId: linkData.blueprintId }}
63+
className="text-foreground underline hover:no-underline"
64+
>
65+
{data}
66+
</Link>
67+
</div>
68+
);
69+
}
70+
3971
return (
4072
<div className="comet-code size-full max-w-full overflow-hidden whitespace-pre-wrap break-words rounded-md border bg-code-block px-2 py-[11px]">
4173
{showDiffView ? (

apps/opik-frontend/src/v2/pages/CompareExperimentsPage/ConfigurationTab/ConfigurationTab.tsx

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import isObject from "lodash/isObject";
66
import uniq from "lodash/uniq";
77
import toLower from "lodash/toLower";
88
import find from "lodash/find";
9+
import omit from "lodash/omit";
910
import { flattie } from "flattie";
1011

1112
import { COLUMN_TYPE, ColumnData } from "@/types/shared";
@@ -16,6 +17,7 @@ import CompareExperimentsActionsPanel from "@/v2/pages/CompareExperimentsPage/Co
1617
import CompareExperimentsConfigCell, {
1718
CompareConfig,
1819
CompareFiledValue,
20+
AgentConfigLinkData,
1921
} from "@/v2/pages-shared/experiments/CompareExperimentsConfigCell/CompareExperimentsConfigCell";
2022
import PageBodyStickyContainer from "@/shared/PageBodyStickyContainer/PageBodyStickyContainer";
2123
import PageBodyStickyTableWrapper from "@/v2/layout/PageBodyStickyTableWrapper/PageBodyStickyTableWrapper";
@@ -28,6 +30,8 @@ import { Switch } from "@/ui/switch";
2830
import { Label } from "@/ui/label";
2931
import { Separator } from "@/ui/separator";
3032
import { EXPLAINER_ID, EXPLAINERS_MAP } from "@/constants/explainers";
33+
import { AGENT_CONFIGURATION_METADATA_KEY } from "@/utils/agent-configurations";
34+
import { isAgentConfigurationMetadata } from "@/v2/pages-shared/traces/TraceDetailsPanel/TraceDataViewer/AgentConfigurationTab";
3135

3236
const COLUMNS_WIDTH_KEY = "compare-experiments-config-columns-width";
3337

@@ -71,6 +75,21 @@ const ConfigurationTab: React.FunctionComponent<ConfigurationTabProps> = ({
7175
defaultValue: {},
7276
});
7377

78+
const agentConfigLinkData = useMemo(() => {
79+
const result: Record<string, AgentConfigLinkData> = {};
80+
for (const exp of experiments) {
81+
const meta = exp.metadata as Record<string, unknown> | undefined;
82+
const config = meta?.[AGENT_CONFIGURATION_METADATA_KEY];
83+
if (isAgentConfigurationMetadata(config) && exp.project_id) {
84+
result[exp.id] = {
85+
projectId: exp.project_id,
86+
blueprintId: config._blueprint_id,
87+
};
88+
}
89+
}
90+
return result;
91+
}, [experiments]);
92+
7493
const columns = useMemo(() => {
7594
const retVal = convertColumnDataToColumn<CompareConfig, CompareConfig>(
7695
DEFAULT_COLUMNS,
@@ -86,6 +105,7 @@ const ConfigurationTab: React.FunctionComponent<ConfigurationTabProps> = ({
86105
custom: {
87106
onlyDiff,
88107
experiment: find(experiments, (e) => e.id === id),
108+
agentConfigLinkData,
89109
},
90110
},
91111
size: 400,
@@ -94,16 +114,30 @@ const ConfigurationTab: React.FunctionComponent<ConfigurationTabProps> = ({
94114
});
95115

96116
return retVal;
97-
}, [experimentsIds, onlyDiff, experiments]);
117+
}, [experimentsIds, onlyDiff, experiments, agentConfigLinkData]);
98118

99119
const flattenExperimentMetadataMap = useMemo(() => {
100120
return experiments.reduce<
101121
Record<string, Record<string, CompareFiledValue>>
102122
>((acc, experiment) => {
103-
acc[experiment.id] = isObject(experiment.metadata)
104-
? flattie(experiment.metadata, ".", true)
123+
const rawMetadata = experiment.metadata as
124+
| Record<string, unknown>
125+
| undefined;
126+
127+
const agentConfig = rawMetadata?.[AGENT_CONFIGURATION_METADATA_KEY];
128+
const metadata = isObject(rawMetadata)
129+
? omit(rawMetadata, AGENT_CONFIGURATION_METADATA_KEY)
105130
: {};
106131

132+
acc[experiment.id] = isObject(metadata)
133+
? flattie(metadata, ".", true)
134+
: {};
135+
136+
if (isAgentConfigurationMetadata(agentConfig)) {
137+
acc[experiment.id]["agent_configuration"] =
138+
agentConfig.blueprint_version ?? agentConfig._blueprint_id;
139+
}
140+
107141
return acc;
108142
}, {});
109143
}, [experiments]);

sdks/python/src/opik/api_objects/dataset/test_suite/test_suite.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,7 @@ def __internal_api__run_optimization_suite__(
623623
client: Optional["opik_client_module.Opik"] = None,
624624
generate_report: bool = True,
625625
report_output_path: Optional[str] = None,
626+
blueprint_id: Optional[str] = None,
626627
) -> suite_types.TestSuiteResult:
627628
"""
628629
Internal entry point used by the optimizer framework.
@@ -648,4 +649,5 @@ def __internal_api__run_optimization_suite__(
648649
experiment_type=experiment_type,
649650
generate_report=generate_report,
650651
report_output_path=report_output_path,
652+
blueprint_id=blueprint_id,
651653
)

sdks/python/src/opik/evaluation/evaluator.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,26 @@
5050
EVALUATION_STREAM_DATASET_BATCH_SIZE = 200
5151

5252

53+
def _merge_blueprint_into_config(
54+
client: "opik_client.Opik",
55+
blueprint_id: str,
56+
experiment_config: Optional[Dict[str, Any]],
57+
) -> Dict[str, Any]:
58+
"""Add blueprint reference to experiment_config under ``agent_configuration``."""
59+
experiment_config = dict(experiment_config) if experiment_config else {}
60+
agent_config: Dict[str, str] = {"_blueprint_id": blueprint_id}
61+
try:
62+
blueprint = client._rest_client.agent_configs.get_blueprint_by_id(
63+
blueprint_id=blueprint_id,
64+
)
65+
if blueprint.name:
66+
agent_config["blueprint_version"] = blueprint.name
67+
except Exception:
68+
LOGGER.debug("Failed to fetch blueprint %s", blueprint_id, exc_info=True)
69+
experiment_config["agent_configuration"] = agent_config
70+
return experiment_config
71+
72+
5373
def _calculate_total_items(
5474
dataset_: Union[dataset.Dataset, dataset.DatasetVersion],
5575
nb_samples: Optional[int],
@@ -170,6 +190,7 @@ def evaluate(
170190
experiment_scoring_functions: Optional[List[ExperimentScoreFunction]] = None,
171191
experiment_tags: Optional[List[str]] = None,
172192
dataset_filter_string: Optional[str] = None,
193+
blueprint_id: Optional[str] = None,
173194
) -> evaluation_result.EvaluationResult:
174195
"""
175196
Performs task evaluation on a given dataset. You can use either `scoring_metrics` or `scorer_functions` to calculate
@@ -271,6 +292,13 @@ def evaluate(
271292

272293
client = opik_client.get_global_client()
273294

295+
if blueprint_id:
296+
experiment_config = _merge_blueprint_into_config(
297+
client,
298+
blueprint_id,
299+
experiment_config,
300+
)
301+
274302
experiment_name = _use_or_create_experiment_name(
275303
experiment_name=experiment_name,
276304
experiment_name_prefix=experiment_name_prefix,
@@ -333,6 +361,7 @@ def __internal_api__run_test_suite__(
333361
experiment_type: Optional[str] = None,
334362
generate_report: bool = True,
335363
report_output_path: Optional[str] = None,
364+
blueprint_id: Optional[str] = None,
336365
) -> "suite_types.TestSuiteResult":
337366
"""
338367
Internal function that runs the full test suite evaluation pipeline:
@@ -352,6 +381,13 @@ def __internal_api__run_test_suite__(
352381
if client is None:
353382
client = opik_client.get_global_client()
354383

384+
if blueprint_id:
385+
experiment_config = _merge_blueprint_into_config(
386+
client,
387+
blueprint_id,
388+
experiment_config,
389+
)
390+
355391
@functools.wraps(task)
356392
def _validated_task(data: Dict[str, Any]) -> Any:
357393
return validate_task_result(task(data), input_data=data)
@@ -445,6 +481,7 @@ def run_tests(
445481
model: Optional[str] = None,
446482
generate_report: bool = True,
447483
report_output_path: Optional[str] = None,
484+
blueprint_id: Optional[str] = None,
448485
) -> "suite_types.TestSuiteResult":
449486
"""
450487
Run a test suite against a task function.
@@ -505,6 +542,7 @@ def run_tests(
505542
evaluator_model=model,
506543
generate_report=generate_report,
507544
report_output_path=report_output_path,
545+
blueprint_id=blueprint_id,
508546
)
509547

510548

sdks/python/tests/unit/evaluation/test_evaluate.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3538,3 +3538,44 @@ def say_task(item: Dict[str, Any]):
35383538
desc=mock.ANY,
35393539
total=mock.ANY,
35403540
)
3541+
3542+
3543+
class TestMergeBlueprintIntoConfig:
3544+
@staticmethod
3545+
def _make_blueprint(id, name):
3546+
bp = mock.MagicMock()
3547+
bp.id = id
3548+
bp.name = name
3549+
return bp
3550+
3551+
def test_blueprint_fetched_and_version_stored(self):
3552+
mock_client = mock.Mock()
3553+
mock_client._rest_client.agent_configs.get_blueprint_by_id.return_value = (
3554+
self._make_blueprint("bp-123", "v9")
3555+
)
3556+
3557+
result = evaluator_module._merge_blueprint_into_config(
3558+
mock_client,
3559+
"bp-123",
3560+
{"model": "gpt-4o"},
3561+
)
3562+
3563+
assert result["model"] == "gpt-4o"
3564+
assert result["agent_configuration"] == {
3565+
"_blueprint_id": "bp-123",
3566+
"blueprint_version": "v9",
3567+
}
3568+
3569+
def test_blueprint_fetch_fails_still_stores_id(self):
3570+
mock_client = mock.Mock()
3571+
mock_client._rest_client.agent_configs.get_blueprint_by_id.side_effect = (
3572+
Exception("not found")
3573+
)
3574+
3575+
result = evaluator_module._merge_blueprint_into_config(
3576+
mock_client,
3577+
"bp-456",
3578+
None,
3579+
)
3580+
3581+
assert result["agent_configuration"] == {"_blueprint_id": "bp-456"}

sdks/typescript/src/opik/evaluation/evaluate.ts

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ export interface EvaluateOptions<T = Record<string, unknown>> {
5555

5656
/** Optional list of tags to associate with the experiment */
5757
tags?: string[];
58+
59+
/** Optional agent configuration blueprint ID to link with the experiment */
60+
blueprintId?: string;
5861
}
5962

6063
export async function evaluate<T = Record<string, unknown>>(
@@ -72,14 +75,27 @@ export async function evaluate<T = Record<string, unknown>>(
7275
// Get Opik client
7376
const client = options.client ?? OpikSingleton.getInstance();
7477

78+
// Resolve agent config blueprint if provided
79+
let experimentConfig = options.experimentConfig;
80+
if (options.blueprintId) {
81+
const agentConfig: Record<string, string> = { _blueprint_id: options.blueprintId };
82+
try {
83+
const blueprint = await client.api.agentConfigs.getBlueprintById(options.blueprintId);
84+
if (blueprint.name) agentConfig.blueprint_version = blueprint.name;
85+
} catch (error) {
86+
logger.debug(`Failed to fetch blueprint ${options.blueprintId}: ${error}`);
87+
}
88+
experimentConfig = { ...experimentConfig, agent_configuration: agentConfig };
89+
}
90+
7591
// Get version info for experiment linking
7692
const versionInfo = await options.dataset.getVersionInfo();
7793

7894
// Create experiment for this evaluation run
7995
const experiment = await client.createExperiment({
8096
name: options.experimentName,
8197
datasetName: options.dataset.name,
82-
experimentConfig: options.experimentConfig,
98+
experimentConfig,
8399
prompts: options.prompts,
84100
datasetVersionId: versionInfo?.id,
85101
tags: options.tags,

0 commit comments

Comments
 (0)