diff --git a/src/cli-core.ts b/src/cli-core.ts index 4fa113f6..ed057f6a 100644 --- a/src/cli-core.ts +++ b/src/cli-core.ts @@ -38,6 +38,7 @@ import { type NormalizedOutputError, } from "./error-normalization.js"; import { flushPerfMetricsCapture, installPerfMetricsCapture } from "./perf-metrics-capture.js"; +import { mergePromptSourceWithText, parsePromptSource, textPrompt } from "./prompt-content.js"; import { runQueueOwnerFromEnv } from "./queue-owner-env.js"; import { DEFAULT_HISTORY_LIMIT, @@ -88,17 +89,14 @@ async function readPrompt( promptParts: string[], filePath: string | undefined, cwd: string, -): Promise { +): Promise { if (filePath) { const source = filePath === "-" ? await readPromptInputFromStdin() : await fs.readFile(path.resolve(cwd, filePath), "utf8"); - const pieces = [source.trim(), promptParts.join(" ").trim()].filter( - (value) => value.length > 0, - ); - const prompt = pieces.join("\n\n").trim(); - if (!prompt) { + const prompt = mergePromptSourceWithText(source, promptParts.join(" ")); + if (prompt.length === 0) { throw new InvalidArgumentError("Prompt from --file is empty"); } return prompt; @@ -106,7 +104,7 @@ async function readPrompt( const joined = promptParts.join(" ").trim(); if (joined.length > 0) { - return joined; + return textPrompt(joined); } if (process.stdin.isTTY) { @@ -115,8 +113,8 @@ async function readPrompt( ); } - const prompt = (await readPromptInputFromStdin()).trim(); - if (!prompt) { + const prompt = parsePromptSource(await readPromptInputFromStdin()); + if (prompt.length === 0) { throw new InvalidArgumentError("Prompt from stdin is empty"); } @@ -251,7 +249,7 @@ async function handlePrompt( await printPromptSessionBanner(record, agent.cwd, outputPolicy.format, outputPolicy.jsonStrict); const result = await sendSession({ sessionId: record.acpxRecordId, - message: prompt, + prompt, mcpServers: config.mcpServers, permissionMode, nonInteractivePermissions: globalFlags.nonInteractivePermissions, @@ -327,7 +325,7 @@ async function handleExec( const result = await runOnce({ agentCommand: agent.agentCommand, cwd: agent.cwd, - message: prompt, + prompt, mcpServers: config.mcpServers, permissionMode, nonInteractivePermissions: globalFlags.nonInteractivePermissions, diff --git a/src/client.ts b/src/client.ts index 50995ffd..22c41661 100644 --- a/src/client.ts +++ b/src/client.ts @@ -40,6 +40,7 @@ import { } from "./errors.js"; import { FileSystemHandlers } from "./filesystem.js"; import { classifyPermissionDecision, resolvePermissionRequest } from "./permissions.js"; +import { textPrompt } from "./prompt-content.js"; import { extractRuntimeSessionId } from "./runtime-session-id.js"; import { TimeoutError, withTimeout } from "./session-runtime-helpers.js"; import { TerminalManager } from "./terminal.js"; @@ -48,6 +49,7 @@ import type { NonInteractivePermissionPolicy, PermissionMode, PermissionStats, + PromptInput, } from "./types.js"; type CommandParts = { @@ -976,7 +978,7 @@ export class AcpClient { }; } - async prompt(sessionId: string, text: string): Promise { + async prompt(sessionId: string, prompt: PromptInput | string): Promise { const connection = this.getConnection(); const restoreConsoleError = this.options.suppressSdkConsoleErrors ? installSdkConsoleErrorSuppression() @@ -986,12 +988,7 @@ export class AcpClient { try { promptPromise = connection.prompt({ sessionId, - prompt: [ - { - type: "text", - text, - }, - ], + prompt: typeof prompt === "string" ? textPrompt(prompt) : prompt, }); } catch (error) { restoreConsoleError?.(); diff --git a/src/prompt-content.ts b/src/prompt-content.ts new file mode 100644 index 00000000..4dd60736 --- /dev/null +++ b/src/prompt-content.ts @@ -0,0 +1,130 @@ +import type { ContentBlock } from "@agentclientprotocol/sdk"; + +export type PromptInput = ContentBlock[]; + +function asRecord(value: unknown): Record | undefined { + if (!value || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + return value as Record; +} + +function isTextBlock(value: unknown): value is Extract { + const record = asRecord(value); + return record?.type === "text" && typeof record.text === "string"; +} + +function isImageBlock(value: unknown): value is Extract { + const record = asRecord(value); + return ( + record?.type === "image" && + typeof record.mimeType === "string" && + typeof record.data === "string" + ); +} + +function isResourceLinkBlock( + value: unknown, +): value is Extract { + const record = asRecord(value); + return ( + record?.type === "resource_link" && + typeof record.uri === "string" && + (record.title === undefined || typeof record.title === "string") && + (record.name === undefined || typeof record.name === "string") + ); +} + +function isResourcePayload(value: unknown): boolean { + const record = asRecord(value); + if (!record || typeof record.uri !== "string") { + return false; + } + return record.text === undefined || typeof record.text === "string"; +} + +function isResourceBlock(value: unknown): value is Extract { + const record = asRecord(value); + return record?.type === "resource" && isResourcePayload(record.resource); +} + +function isContentBlock(value: unknown): value is ContentBlock { + return ( + isTextBlock(value) || + isImageBlock(value) || + isResourceLinkBlock(value) || + isResourceBlock(value) + ); +} + +export function isPromptInput(value: unknown): value is PromptInput { + return Array.isArray(value) && value.every((entry) => isContentBlock(entry)); +} + +export function textPrompt(text: string): PromptInput { + return [ + { + type: "text", + text, + }, + ]; +} + +function parseStructuredPrompt(source: string): PromptInput | undefined { + if (!source.startsWith("[")) { + return undefined; + } + try { + const parsed = JSON.parse(source) as unknown; + return isPromptInput(parsed) ? parsed : undefined; + } catch { + return undefined; + } +} + +export function parsePromptSource(source: string): PromptInput { + const trimmed = source.trim(); + const structured = parseStructuredPrompt(trimmed); + if (structured) { + return structured; + } + if (!trimmed) { + return []; + } + return textPrompt(trimmed); +} + +export function mergePromptSourceWithText(source: string, suffixText: string): PromptInput { + const prompt = parsePromptSource(source); + const appended = suffixText.trim(); + if (!appended) { + return prompt; + } + if (prompt.length === 0) { + return textPrompt(appended); + } + return [...prompt, ...textPrompt(appended)]; +} + +export function promptToDisplayText(prompt: PromptInput): string { + return prompt + .map((block) => { + switch (block.type) { + case "text": + return block.text; + case "resource_link": + return block.title ?? block.name ?? block.uri; + case "resource": + return "text" in block.resource && typeof block.resource.text === "string" + ? block.resource.text + : block.resource.uri; + case "image": + return `[image] ${block.mimeType}`; + default: + return ""; + } + }) + .filter((entry) => entry.trim().length > 0) + .join("\n\n") + .trim(); +} diff --git a/src/queue-ipc-server.ts b/src/queue-ipc-server.ts index 5e9a3f58..a04b88cf 100644 --- a/src/queue-ipc-server.ts +++ b/src/queue-ipc-server.ts @@ -2,12 +2,13 @@ import net from "node:net"; import type { SetSessionConfigOptionResponse } from "@agentclientprotocol/sdk"; import { normalizeOutputError } from "./error-normalization.js"; import { recordPerfDuration } from "./perf-metrics.js"; +import { textPrompt } from "./prompt-content.js"; import { parseQueueRequest, type QueueOwnerErrorMessage, type QueueOwnerMessage, } from "./queue-messages.js"; -import type { NonInteractivePermissionPolicy, PermissionMode } from "./types.js"; +import type { NonInteractivePermissionPolicy, PermissionMode, PromptInput } from "./types.js"; type QueueOwnerSocketLease = { socketPath: string; @@ -71,6 +72,7 @@ function writeQueueMessage(socket: net.Socket, message: QueueOwnerMessage): void export type QueueTask = { requestId: string; message: string; + prompt: PromptInput; permissionMode: PermissionMode; nonInteractivePermissions?: NonInteractivePermissionPolicy; timeoutMs?: number; @@ -440,6 +442,7 @@ export class SessionQueueOwner { const task: QueueTask = { requestId: request.requestId, message: request.message, + prompt: request.prompt ?? textPrompt(request.message), permissionMode: request.permissionMode, nonInteractivePermissions: request.nonInteractivePermissions, timeoutMs: request.timeoutMs, diff --git a/src/queue-ipc.ts b/src/queue-ipc.ts index 2a986149..ac0a2fac 100644 --- a/src/queue-ipc.ts +++ b/src/queue-ipc.ts @@ -32,6 +32,7 @@ import type { OutputErrorEmissionPolicy, OutputFormatter, PermissionMode, + PromptInput, SessionEnqueueResult, SessionSendOutcome, } from "./types.js"; @@ -220,6 +221,7 @@ function assertOwnerGeneration( export type SubmitToQueueOwnerOptions = { sessionId: string; message: string; + prompt?: PromptInput; permissionMode: PermissionMode; nonInteractivePermissions?: NonInteractivePermissionPolicy; outputFormatter: OutputFormatter; @@ -246,6 +248,7 @@ async function submitToQueueOwner( requestId, ownerGeneration: owner.ownerGeneration, message: options.message, + prompt: options.prompt, permissionMode: options.permissionMode, nonInteractivePermissions: options.nonInteractivePermissions, timeoutMs: options.timeoutMs, diff --git a/src/queue-messages.ts b/src/queue-messages.ts index e9a86230..de779e6c 100644 --- a/src/queue-messages.ts +++ b/src/queue-messages.ts @@ -1,5 +1,6 @@ import type { SetSessionConfigOptionResponse } from "@agentclientprotocol/sdk"; import { isAcpJsonRpcMessage } from "./acp-jsonrpc.js"; +import { isPromptInput, textPrompt } from "./prompt-content.js"; import { OUTPUT_ERROR_CODES, OUTPUT_ERROR_ORIGINS, @@ -11,6 +12,7 @@ import type { AcpJsonRpcMessage, NonInteractivePermissionPolicy, PermissionMode, + PromptInput, SessionSendResult, } from "./types.js"; @@ -19,6 +21,7 @@ export type QueueSubmitRequest = { requestId: string; ownerGeneration?: number; message: string; + prompt?: PromptInput; permissionMode: PermissionMode; nonInteractivePermissions?: NonInteractivePermissionPolicy; timeoutMs?: number; @@ -204,9 +207,12 @@ export function parseQueueRequest(raw: unknown): QueueRequest | null { ? request.suppressSdkConsoleErrors : null; + const prompt = + request.prompt == null ? undefined : isPromptInput(request.prompt) ? request.prompt : null; if ( typeof request.message !== "string" || !isPermissionMode(request.permissionMode) || + prompt === null || nonInteractivePermissions === null || suppressSdkConsoleErrors === null || typeof request.waitForCompletion !== "boolean" @@ -219,6 +225,7 @@ export function parseQueueRequest(raw: unknown): QueueRequest | null { requestId: request.requestId, ownerGeneration, message: request.message, + prompt: prompt ?? textPrompt(request.message), permissionMode: request.permissionMode, nonInteractivePermissions, timeoutMs, diff --git a/src/session-conversation-model.ts b/src/session-conversation-model.ts index 1a6d7a19..d43aaf89 100644 --- a/src/session-conversation-model.ts +++ b/src/session-conversation-model.ts @@ -7,8 +7,10 @@ import type { ToolCallUpdate, UsageUpdate, } from "@agentclientprotocol/sdk"; +import { textPrompt } from "./prompt-content.js"; import type { ClientOperation, + PromptInput, SessionAcpxState, SessionConversation, SessionAgentContent, @@ -489,18 +491,28 @@ export function appendLegacyHistory( export function recordPromptSubmission( conversation: SessionConversation, - prompt: string, + prompt: PromptInput | string, timestamp = isoNow(), ): void { - const text = prompt.trim(); - if (!text) { + const normalizedPrompt = typeof prompt === "string" ? textPrompt(prompt) : prompt; + const userContent = normalizedPrompt + .map((content) => contentToUserContent(content)) + .filter((content) => content !== undefined); + if (userContent.length === 0) { return; } conversation.messages.push({ User: { id: nextUserMessageId(), - content: [{ Text: trimRuntimeText(text, MAX_RUNTIME_AGENT_TEXT_CHARS) }], + content: userContent.map((content) => { + if ("Text" in content) { + return { + Text: trimRuntimeText(content.Text, MAX_RUNTIME_AGENT_TEXT_CHARS), + }; + } + return content; + }), }, }); updateConversationTimestamp(conversation, timestamp); diff --git a/src/session-runtime.ts b/src/session-runtime.ts index ba7b76ca..c6ffb140 100644 --- a/src/session-runtime.ts +++ b/src/session-runtime.ts @@ -52,6 +52,7 @@ import { type QueueOwnerRuntimeOptions, } from "./session-runtime/queue-owner-process.js"; export type { QueueOwnerRuntimeOptions } from "./session-runtime/queue-owner-process.js"; +import { promptToDisplayText, textPrompt } from "./prompt-content.js"; import { DEFAULT_HISTORY_LIMIT, absolutePath, @@ -77,6 +78,7 @@ import { type OutputErrorOrigin, type OutputFormatter, type PermissionMode, + type PromptInput, type RunPromptResult, type SessionEnsureResult, type SessionRecord, @@ -104,7 +106,7 @@ export type SessionAgentOptions = { export type RunOnceOptions = { agentCommand: string; cwd: string; - message: string; + prompt: PromptInput; mcpServers?: McpServer[]; permissionMode: PermissionMode; nonInteractivePermissions?: NonInteractivePermissionPolicy; @@ -132,7 +134,7 @@ export type SessionCreateOptions = { export type SessionSendOptions = { sessionId: string; - message: string; + prompt: PromptInput; mcpServers?: McpServer[]; permissionMode: PermissionMode; nonInteractivePermissions?: NonInteractivePermissionPolicy; @@ -207,7 +209,7 @@ function toPromptResult( type RunSessionPromptOptions = { sessionRecordId: string; - message: string; + prompt: PromptInput; mcpServers?: McpServer[]; permissionMode: PermissionMode; nonInteractivePermissions?: NonInteractivePermissionPolicy; @@ -322,8 +324,8 @@ async function runQueuedTask( try { const result = await runSessionPrompt({ sessionRecordId, - message: task.message, mcpServers: options.mcpServers, + prompt: task.prompt ?? textPrompt(task.message), permissionMode: task.permissionMode, nonInteractivePermissions: task.nonInteractivePermissions ?? options.nonInteractivePermissions, @@ -383,7 +385,7 @@ async function runSessionPrompt(options: RunSessionPromptOptions): Promise }); const response = await measurePerf("runtime.exec.prompt", async () => { - return await withTimeout(client.prompt(sessionId, options.message), options.timeoutMs); + return await withTimeout(client.prompt(sessionId, options.prompt), options.timeoutMs); }); output.flush(); return toPromptResult(response.stopReason, sessionId, client); @@ -809,7 +811,8 @@ async function submitToRunningOwner( ): Promise { return await trySubmitToRunningOwner({ sessionId: options.sessionId, - message: options.message, + message: promptToDisplayText(options.prompt), + prompt: options.prompt, permissionMode: options.permissionMode, nonInteractivePermissions: options.nonInteractivePermissions, outputFormatter: options.outputFormatter, diff --git a/src/types.ts b/src/types.ts index 665b7c8e..415a18dc 100644 --- a/src/types.ts +++ b/src/types.ts @@ -8,6 +8,7 @@ import type { StopReason, } from "@agentclientprotocol/sdk"; export type { McpServer } from "@agentclientprotocol/sdk"; +import type { PromptInput } from "./prompt-content.js"; export const EXIT_CODES = { SUCCESS: 0, @@ -351,3 +352,4 @@ export type SessionEnqueueResult = { }; export type SessionSendOutcome = SessionSendResult | SessionEnqueueResult; +export type { PromptInput }; diff --git a/test/cli.test.ts b/test/cli.test.ts index 293dda0c..618d0725 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -1427,6 +1427,62 @@ test("prompt supports --file - with additional argument text", async () => { }); }); +test("exec accepts structured ACP prompt blocks from stdin", async () => { + await withTempHome(async (homeDir) => { + const cwd = path.join(homeDir, "workspace"); + await fs.mkdir(cwd, { recursive: true }); + + const result = await runCli( + ["--agent", MOCK_AGENT_COMMAND, "--cwd", cwd, "--format", "quiet", "exec"], + homeDir, + { + stdin: JSON.stringify([ + { type: "text", text: "inspect-prompt" }, + { type: "image", mimeType: "image/png", data: "aW1hZ2U=" }, + ]), + }, + ); + + assert.equal(result.code, 0, result.stderr); + const payload = JSON.parse(result.stdout.trim()) as Array>; + assert.deepEqual(payload, [ + { type: "text", text: "inspect-prompt" }, + { type: "image", mimeType: "image/png", bytes: 8 }, + ]); + }); +}); + +test("prompt preserves structured ACP prompt blocks through the queue owner", async () => { + await withTempHome(async (homeDir) => { + const cwd = path.join(homeDir, "workspace"); + await fs.mkdir(cwd, { recursive: true }); + + const created = await runCli( + ["--agent", MOCK_AGENT_COMMAND, "--cwd", cwd, "sessions", "new"], + homeDir, + ); + assert.equal(created.code, 0, created.stderr); + + const result = await runCli( + ["--agent", MOCK_AGENT_COMMAND, "--cwd", cwd, "--format", "quiet", "prompt"], + homeDir, + { + stdin: JSON.stringify([ + { type: "text", text: "inspect-prompt" }, + { type: "image", mimeType: "image/png", data: "aW1hZ2U=" }, + ]), + }, + ); + + assert.equal(result.code, 0, result.stderr); + const payload = JSON.parse(result.stdout.trim()) as Array>; + assert.deepEqual(payload, [ + { type: "text", text: "inspect-prompt" }, + { type: "image", mimeType: "image/png", bytes: 8 }, + ]); + }); +}); + test("prompt subcommand accepts --file without being consumed by parent command", async () => { await withTempHome(async (homeDir) => { const cwd = path.join(homeDir, "workspace"); diff --git a/test/mock-agent.ts b/test/mock-agent.ts index 3b4d3166..5e21b5e3 100644 --- a/test/mock-agent.ts +++ b/test/mock-agent.ts @@ -97,6 +97,27 @@ function getPromptText(prompt: ContentBlock[]): string { return parts.join("").trim(); } +function describePromptBlocks(prompt: ContentBlock[]): string { + return JSON.stringify( + prompt.map((block) => { + switch (block.type) { + case "text": + return { type: "text", text: block.text }; + case "image": + return { type: "image", mimeType: block.mimeType, bytes: block.data.length }; + case "resource_link": + return { type: "resource_link", uri: block.uri }; + case "resource": + return { + type: "resource", + uri: block.resource.uri, + hasText: "text" in block.resource && typeof block.resource.text === "string", + }; + } + }), + ); +} + function splitCommandLine(value: string): ParsedCommand { const parts: string[] = []; let current = ""; @@ -487,7 +508,10 @@ class MockAgent implements Agent { try { const text = getPromptText(params.prompt); - const response = await this.handlePrompt(params.sessionId, text, promptAbort.signal); + const response = + text === "inspect-prompt" + ? describePromptBlocks(params.prompt) + : await this.handlePrompt(params.sessionId, text, promptAbort.signal); session.hasCompletedPrompt = true; await this.sendAssistantMessage(params.sessionId, response); return { stopReason: "end_turn" }; diff --git a/test/queue-messages.test.ts b/test/queue-messages.test.ts index fcec500b..467615d7 100644 --- a/test/queue-messages.test.ts +++ b/test/queue-messages.test.ts @@ -19,6 +19,7 @@ test("parseQueueRequest accepts submit_prompt with nonInteractivePermissions", ( requestId: "req-1", ownerGeneration: 123, message: "hello", + prompt: [{ type: "text", text: "hello" }], permissionMode: "approve-reads", nonInteractivePermissions: "fail", timeoutMs: 1_500,