Skip to content
150 changes: 104 additions & 46 deletions src/app/src/pages/eval/components/EvalOutputCell.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,20 @@ import { Tooltip, TooltipContent, TooltipTrigger } from '@app/components/ui/tool
import useCloudConfig from '@app/hooks/useCloudConfig';
import { useEvalOperations } from '@app/hooks/useEvalOperations';
import { useShiftKey } from '@app/hooks/useShiftKey';
import { fetchCellDetail } from '@app/utils/api';
import {
normalizeMediaText,
resolveAudioSource,
resolveImageSource,
resolveVideoSource,
} from '@app/utils/media';
import { getActualPrompt } from '@app/utils/providerResponse';
import { type EvaluateTableOutput, ResultFailureReason } from '@promptfoo/types';
import {
type EvaluateTableOutput,
type ProviderResponse,
ResultFailureReason,
type Vars,
} from '@promptfoo/types';
import { diffJson, diffSentences, diffWords } from 'diff';
import {
Check,
Expand Down Expand Up @@ -96,6 +102,7 @@ export interface EvalOutputCellProps {
evaluationId?: string;
testCaseId?: string;
isRedteam?: boolean;
testVars?: Vars;
}

/**
Expand Down Expand Up @@ -130,6 +137,7 @@ function EvalOutputCell({
evaluationId,
testCaseId,
isRedteam,
testVars,
}: EvalOutputCellProps & {
firstOutput: EvaluateTableOutput;
showDiffs: boolean;
Expand All @@ -151,19 +159,70 @@ function EvalOutputCell({
const { replayEvaluation, fetchTraces } = useEvalOperations();

const [openPrompt, setOpen] = React.useState(false);
const [cellDetail, setCellDetail] = React.useState<{
prompt?: string;
response?: Record<string, unknown>;
testCase?: Record<string, unknown>;
} | null>(null);
const [loadingDetail, setLoadingDetail] = React.useState(false);
const [activeRating, setActiveRating] = React.useState<boolean | null>(
getHumanRating(output)?.pass ?? null,
);

// Update activeRating when output changes
// Update activeRating and reset lazy-loaded detail when the cell is reused
React.useEffect(() => {
const humanRating = getHumanRating(output)?.pass;
setActiveRating(humanRating ?? null);
setCellDetail(null);
setLoadingDetail(false);
}, [output]);

const handlePromptOpen = () => {
// The cell's evalId (from ...result spread, preserved through trimming) is needed
// for the detail endpoint, especially in comparison mode where cells from different
// evals share the same table. Falls back to the page-level evaluationId.
const cellEvalId = (output as unknown as Record<string, unknown>).evalId as string | undefined;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like output's type should be fixed upstream

const detailEvalId = cellEvalId || evaluationId || '';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When will cellEvalId and evaluationId differ?


// Auto-fetch prompt when "Show Prompts" is toggled on and prompt was stripped.
// Uses cleanup function to cancel stale responses during rapid cell/toggle changes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you consider instead disabling toggling while the request is in-flight?

React.useEffect(() => {
if (
showPrompts &&
!cellDetail &&
!loadingDetail &&
!output.prompt &&
output.id &&
detailEvalId
) {
let cancelled = false;
setLoadingDetail(true);
fetchCellDetail(detailEvalId, output.id).then((detail) => {
if (!cancelled) {
if (detail) {
setCellDetail(detail);
}
setLoadingDetail(false);
}
});
return () => {
cancelled = true;
};
}
}, [showPrompts, cellDetail, loadingDetail, output.prompt, output.id, detailEvalId]);
Comment on lines +186 to +211
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

# Check if the file exists and read the specific lines
wc -l src/app/src/pages/eval/components/EvalOutputCell.tsx

Repository: promptfoo/promptfoo

Length of output: 120


🏁 Script executed:

# Read the context around lines 186-211
sed -n '180,220p' src/app/src/pages/eval/components/EvalOutputCell.tsx

Repository: promptfoo/promptfoo

Length of output: 1624


🏁 Script executed:

# Find the fetchCellDetail function to understand its signature
rg -n "fetchCellDetail" src/app/src/pages/eval/components/EvalOutputCell.tsx -A 2 -B 2

Repository: promptfoo/promptfoo

Length of output: 714


🏁 Script executed:

# Search for fetchCellDetail definition in the codebase
rg -n "const fetchCellDetail|function fetchCellDetail|export.*fetchCellDetail" --type ts --type tsx

Repository: promptfoo/promptfoo

Length of output: 90


🏁 Script executed:

# Search for fetchCellDetail definition more broadly
rg "export.*fetchCellDetail|const fetchCellDetail\s*=" --type ts -A 5

Repository: promptfoo/promptfoo

Length of output: 419


🏁 Script executed:

# Also check the api utils file directly
fd -t f "api.ts" src/app/src/utils/

Repository: promptfoo/promptfoo

Length of output: 87


🏁 Script executed:

# Read the api utility file
cat -n src/app/src/utils/api.ts | head -150

Repository: promptfoo/promptfoo

Length of output: 3063


🏁 Script executed:

# Search for fetchCellDetail in api.ts
rg -n "fetchCellDetail" src/app/src/utils/api.ts -A 10 -B 2

Repository: promptfoo/promptfoo

Length of output: 400


Reset loadingDetail in cleanup to prevent stuck loading state.

When the effect cleanup runs before the async request resolves (e.g., showPrompts toggles off), the if (!cancelled) guard prevents setLoadingDetail(false) from executing. This leaves the dialog in a perpetual loading state and blocks subsequent fetch attempts. Reset loadingDetail in the cleanup function and use async/await for consistency with coding guidelines.

🛠️ Suggested fix
 React.useEffect(() => {
-  if (
-    showPrompts &&
-    !cellDetail &&
-    !loadingDetail &&
-    !output.prompt &&
-    output.id &&
-    detailEvalId
-  ) {
-    let cancelled = false;
-    setLoadingDetail(true);
-    fetchCellDetail(detailEvalId, output.id).then((detail) => {
-      if (!cancelled) {
-        if (detail) {
-          setCellDetail(detail);
-        }
-        setLoadingDetail(false);
-      }
-    });
-    return () => {
-      cancelled = true;
-    };
-  }
+  if (
+    !showPrompts ||
+    cellDetail ||
+    loadingDetail ||
+    output.prompt ||
+    !output.id ||
+    !detailEvalId
+  ) {
+    return;
+  }
+
+  let cancelled = false;
+  const loadDetail = async () => {
+    setLoadingDetail(true);
+    try {
+      const detail = await fetchCellDetail(detailEvalId, output.id);
+      if (!cancelled && detail) {
+        setCellDetail(detail);
+      }
+    } finally {
+      if (!cancelled) {
+        setLoadingDetail(false);
+      }
+    }
+  };
+  loadDetail();
+
+  return () => {
+    cancelled = true;
+    setLoadingDetail(false);
+  };
 }, [showPrompts, cellDetail, loadingDetail, output.prompt, output.id, detailEvalId]);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// Auto-fetch prompt when "Show Prompts" is toggled on and prompt was stripped.
// Uses cleanup function to cancel stale responses during rapid cell/toggle changes.
React.useEffect(() => {
if (
showPrompts &&
!cellDetail &&
!loadingDetail &&
!output.prompt &&
output.id &&
detailEvalId
) {
let cancelled = false;
setLoadingDetail(true);
fetchCellDetail(detailEvalId, output.id).then((detail) => {
if (!cancelled) {
if (detail) {
setCellDetail(detail);
}
setLoadingDetail(false);
}
});
return () => {
cancelled = true;
};
}
}, [showPrompts, cellDetail, loadingDetail, output.prompt, output.id, detailEvalId]);
// Auto-fetch prompt when "Show Prompts" is toggled on and prompt was stripped.
// Uses cleanup function to cancel stale responses during rapid cell/toggle changes.
React.useEffect(() => {
if (
!showPrompts ||
cellDetail ||
loadingDetail ||
output.prompt ||
!output.id ||
!detailEvalId
) {
return;
}
let cancelled = false;
const loadDetail = async () => {
setLoadingDetail(true);
try {
const detail = await fetchCellDetail(detailEvalId, output.id);
if (!cancelled && detail) {
setCellDetail(detail);
}
} finally {
if (!cancelled) {
setLoadingDetail(false);
}
}
};
loadDetail();
return () => {
cancelled = true;
setLoadingDetail(false);
};
}, [showPrompts, cellDetail, loadingDetail, output.prompt, output.id, detailEvalId]);
🤖 Prompt for AI Agents
In `@src/app/src/pages/eval/components/EvalOutputCell.tsx` around lines 186 - 211,
The useEffect that auto-fetches prompt details (React.useEffect) can leave
loadingDetail stuck because the cleanup only sets cancelled=true and the async
.then never runs setLoadingDetail(false); update the effect to use async/await
inside an inner async function called (e.g., fetchDetailAsync) that awaits
fetchCellDetail(detailEvalId, output.id), and in the cleanup always call
setLoadingDetail(false) as well as flip the cancelled flag; ensure you still
only call setCellDetail(detail) when not cancelled and that the effect
dependencies remain [showPrompts, cellDetail, loadingDetail, output.prompt,
output.id, detailEvalId].


const handlePromptOpen = useCallback(async () => {
setOpen(true);
};
// Lazy-load the full prompt if not already fetched.
// The table endpoint strips prompt content to reduce payload size.
if (!cellDetail && !loadingDetail && detailEvalId && output.id) {
setLoadingDetail(true);
const detail = await fetchCellDetail(detailEvalId, output.id);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be wrapped in try/catch w/ an error state?

if (detail) {
setCellDetail(detail);
}
setLoadingDetail(false);
}
}, [cellDetail, loadingDetail, detailEvalId, output.id]);
const handlePromptClose = () => {
setOpen(false);
};
Expand Down Expand Up @@ -346,7 +405,7 @@ function EvalOutputCell({
node = (
<img
src={src}
alt={output.prompt}
alt={cellDetail?.prompt || output.text || 'Generated image'}
style={{ width: '100%' }}
onClick={() => toggleLightbox(src)}
/>
Expand Down Expand Up @@ -841,44 +900,43 @@ function EvalOutputCell({
</TooltipTrigger>
<TooltipContent>Edit comment</TooltipContent>
</Tooltip>
{output.prompt && (
<>
<Tooltip disableHoverableContent>
<TooltipTrigger asChild>
<button
type="button"
className="action p-1 rounded hover:bg-muted transition-colors"
onClick={handlePromptOpen}
aria-label="View output and test details"
>
<Search className="size-4" />
</button>
</TooltipTrigger>
<TooltipContent>View output and test details</TooltipContent>
</Tooltip>
{openPrompt && (
<EvalOutputPromptDialog
open={openPrompt}
onClose={handlePromptClose}
prompt={output.prompt}
provider={output.provider}
gradingResults={output.gradingResult?.componentResults}
output={text}
metadata={output.metadata}
providerPrompt={getActualPrompt(output.response, { formatted: true })}
evaluationId={evaluationId}
testCaseId={testCaseId || output.id}
testIndex={rowIndex}
promptIndex={promptIndex}
variables={output.metadata?.inputVars || output.testCase?.vars}
onAddFilter={addFilter}
onResetFilters={resetFilters}
onReplay={replayEvaluation}
fetchTraces={fetchTraces}
cloudConfig={cloudConfig}
/>
<Tooltip disableHoverableContent>
<TooltipTrigger asChild>
<button
type="button"
className="action p-1 rounded hover:bg-muted transition-colors"
onClick={handlePromptOpen}
aria-label="View output and test details"
>
<Search className="size-4" />
</button>
</TooltipTrigger>
<TooltipContent>View output and test details</TooltipContent>
</Tooltip>
{openPrompt && (
<EvalOutputPromptDialog
open={openPrompt}
onClose={handlePromptClose}
prompt={cellDetail?.prompt || output.prompt || (loadingDetail ? 'Loading...' : '')}
provider={output.provider}
gradingResults={output.gradingResult?.componentResults}
output={text}
metadata={output.metadata}
providerPrompt={getActualPrompt(
(cellDetail?.response as ProviderResponse | undefined) || output.response,
{ formatted: true },
)}
</>
evaluationId={evaluationId}
testCaseId={testCaseId || output.id}
testIndex={rowIndex}
promptIndex={promptIndex}
variables={output.metadata?.inputVars || testVars}
onAddFilter={addFilter}
onResetFilters={resetFilters}
onReplay={replayEvaluation}
fetchTraces={fetchTraces}
cloudConfig={cloudConfig}
/>
)}
</div>
);
Expand Down Expand Up @@ -911,12 +969,12 @@ function EvalOutputCell({
)}
</div>
)}
{showPrompts && firstOutput.prompt && (
{showPrompts && (cellDetail?.prompt || output.prompt) && (
<div className="prompt">
<span className="pill">Prompt</span>
{typeof output.prompt === 'string'
? output.prompt
: JSON.stringify(output.prompt, null, 2)}
{typeof (cellDetail?.prompt || output.prompt) === 'string'
? cellDetail?.prompt || output.prompt
: JSON.stringify(cellDetail?.prompt || output.prompt, null, 2)}
</div>
)}
{/* Show response audio from redteam history if available (target's audio response) */}
Expand Down
1 change: 1 addition & 0 deletions src/app/src/pages/eval/components/ResultsTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,7 @@ function ResultsTable({
evaluationId={evalId || undefined}
testCaseId={info.row.original.test?.metadata?.testCaseId || output.id}
isRedteam={isRedteam}
testVars={info.row.original.test?.vars}
/>
</ErrorBoundary>
) : (
Expand Down
21 changes: 21 additions & 0 deletions src/app/src/utils/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@ export async function fetchUserId(): Promise<string | null> {
}
}

export interface CellDetailResponse {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DRY with cellDetail useState type above.

prompt: string;
response?: Record<string, unknown>;
testCase?: Record<string, unknown>;
}

export async function fetchCellDetail(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document this function

evalId: string,
resultId: string,
): Promise<CellDetailResponse | null> {
try {
const response = await callApi(`/eval/${evalId}/results/${resultId}/detail`);
if (!response.ok) {
return null;
}
return response.json();
} catch {
return null;
}
}

export async function updateEvalAuthor(
evalId: string,
author: string,
Expand Down
39 changes: 25 additions & 14 deletions src/commands/export.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,20 +119,31 @@ export function exportCommand(program: Command) {

logger.info(`Eval with ID ${evalId} has been successfully exported to ${cmdObj.output}.`);
} else {
const summary = await result.toEvaluateSummary();
const metadata = createOutputMetadata(result);
const jsonData = JSON.stringify(
{
evalId: result.id,
results: summary,
config: result.config,
shareableUrl: null,
metadata,
},
null,
2,
);
logger.info(jsonData);
try {
const summary = await result.toEvaluateSummary();
const metadata = createOutputMetadata(result);
const jsonData = JSON.stringify(
{
evalId: result.id,
results: summary,
config: result.config,
shareableUrl: null,
metadata,
},
null,
2,
);
logger.info(jsonData);
} catch (error) {
if (error instanceof RangeError) {
logger.error(
`Eval too large to output to console. Use -o to export to a file instead:\n\n promptfoo export eval ${evalId} -o output.jsonl\n`,
);
process.exitCode = 1;
return;
}
throw error;
}
}

telemetry.record('command_used', {
Expand Down
36 changes: 35 additions & 1 deletion src/server/routes/eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import Eval, { EvalQueries } from '../../models/eval';
import EvalResult from '../../models/evalResult';
import { EvalSchemas, EvalTableQuerySchema } from '../../types/api/eval';
import { deleteEval, deleteEvals, updateResult, writeResultsToDatabase } from '../../util/database';
import { trimTableCellForApi } from '../../util/exportToFile';
import invariant from '../../util/invariant';
import { setDownloadHeaders } from '../utils/downloadHelpers';
import {
Expand Down Expand Up @@ -324,20 +325,53 @@ evalRouter.get('/:id/table', async (req: Request, res: Response): Promise<void>
}
}

// Trim cell data for the API response to prevent massive payloads.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"massive" is less useful than a precise definition

// Each cell's prompt (rendered template with base64 images) is stripped — the frontend
// fetches it on demand via GET /:evalId/results/:resultId/detail.
// Export/download formats return full data (they return early above).
for (const row of returnTable.body) {
row.outputs = row.outputs.map((output) => (output ? trimTableCellForApi(output) : output));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How's the performance on this?

}

// Strip config.tests — unused by the frontend from this endpoint and potentially large.
const { tests: _tests, ...configWithoutTests } = eval_.config;

// Default response for table view
res.json({
table: returnTable,
totalCount: table.totalCount,
filteredCount: table.filteredCount,
filteredMetrics,
config: eval_.config,
config: configWithoutTests,
author: eval_.author || null,
version: eval_.version(),
id,
stats: eval_.getStats(),
} as EvalTableDTO);
});

// Returns the full prompt, response, and testCase for a single result cell.
// The table endpoint strips these fields to keep payloads small; the frontend
// fetches them on demand when the user clicks "Show Prompt".
evalRouter.get(
'/:evalId/results/:resultId/detail',
async (req: Request, res: Response): Promise<void> => {
const { evalId, resultId } = EvalSchemas.ResultDetail.Params.parse(req.params);

const result = await EvalResult.findById(resultId);
if (!result || result.evalId !== evalId) {
res.status(404).json({ error: 'Result not found' });
return;
}

res.json({
prompt: result.prompt.raw,
response: result.response,
testCase: result.testCase,
});
},
Comment on lines +353 to +372
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Wrap the result-detail handler with try/catch and {success,data/error} responses.

Uncaught Zod parse or DB errors will bubble, and the response shape doesn’t match the API contract.

🛠 Suggested fix
evalRouter.get(
  '/:evalId/results/:resultId/detail',
  async (req: Request, res: Response): Promise<void> => {
-    const { evalId, resultId } = EvalSchemas.ResultDetail.Params.parse(req.params);
-
-    const result = await EvalResult.findById(resultId);
-    if (!result || result.evalId !== evalId) {
-      res.status(404).json({ error: 'Result not found' });
-      return;
-    }
-
-    res.json({
-      prompt: result.prompt.raw,
-      response: result.response,
-      testCase: result.testCase,
-    });
+    try {
+      const { evalId, resultId } = EvalSchemas.ResultDetail.Params.parse(req.params);
+      const result = await EvalResult.findById(resultId);
+      if (!result || result.evalId !== evalId) {
+        res.status(404).json({ success: false, error: 'Result not found' });
+        return;
+      }
+
+      res.json({
+        success: true,
+        data: {
+          prompt: result.prompt.raw,
+          response: result.response,
+          testCase: result.testCase,
+        },
+      });
+    } catch (error) {
+      if (error instanceof z.ZodError) {
+        res.status(400).json({ success: false, error: z.prettifyError(error) });
+        return;
+      }
+      logger.error('[GET /:evalId/results/:resultId/detail] Failed to fetch result detail', {
+        error,
+        evalId: req.params.evalId,
+        resultId: req.params.resultId,
+      });
+      res.status(500).json({ success: false, error: 'Failed to fetch result detail' });
+    }
  },
);

As per coding guidelines, "src/server/routes/**/*.{ts,tsx}: Validate requests with Zod schemas from src/types/api/, wrap all responses in { success, data/error } format, handle errors with try-catch blocks in async route handlers."

🤖 Prompt for AI Agents
In `@src/server/routes/eval.ts` around lines 353 - 372, Wrap the async route
handler for evalRouter.get('/:evalId/results/:resultId/detail') in a try/catch;
validate params using EvalSchemas.ResultDetail.Params.parse as currently done,
but if parse throws return res.status(400).json({ success: false, error:
'<validation message>' }); after DB lookup (EvalResult.findById) return 404 via
res.status(404).json({ success: false, error: 'Result not found' }) when missing
or mismatched, and on success return res.json({ success: true, data: { prompt:
result.prompt.raw, response: result.response, testCase: result.testCase } });
catch any other errors and return res.status(500).json({ success: false, error:
String(err) }) to ensure all responses follow the {success, data/error} contract
and Zod/DB errors are handled.

);

evalRouter.get('/:id/metadata-keys', async (req: Request, res: Response): Promise<void> => {
try {
const { id } = EvalSchemas.MetadataKeys.Params.parse(req.params);
Expand Down
12 changes: 12 additions & 0 deletions src/types/api/eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,15 @@ export const EvalTableQuerySchema = z.object({

export type EvalTableQuery = z.infer<typeof EvalTableQuerySchema>;

// GET /api/eval/:evalId/results/:resultId/detail

export const ResultDetailParamsSchema = z.object({
evalId: z.string().min(1),
resultId: z.string().min(1),
});

export type ResultDetailParams = z.infer<typeof ResultDetailParamsSchema>;

/** Grouped schemas for server-side validation. */
export const EvalSchemas = {
UpdateAuthor: {
Expand All @@ -123,4 +132,7 @@ export const EvalSchemas = {
Table: {
Query: EvalTableQuerySchema,
},
ResultDetail: {
Params: ResultDetailParamsSchema,
},
} as const;
Loading
Loading