Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion codex-rs/core/src/codex.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ impl Codex {
&& let Err(err) = resolve_compatible_node(config.js_repl_node_path.as_deref()).await
{
let message = format!(
"Disabled `code_mode` for this session because the configured Node runtime is unavailable or incompatible. {err}"
"Disabled `exec` for this session because the configured Node runtime is unavailable or incompatible. {err}"
);
warn!("{message}");
let _ = config.features.disable(Feature::CodeMode);
Expand Down
71 changes: 41 additions & 30 deletions codex-rs/core/src/tools/code_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use tokio::io::BufReader;

const CODE_MODE_RUNNER_SOURCE: &str = include_str!("code_mode_runner.cjs");
const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("code_mode_bridge.js");
pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";

#[derive(Clone)]
struct ExecContext {
Expand Down Expand Up @@ -89,15 +90,23 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
return None;
}

let mut section = String::from("## Code Mode\n");
section.push_str(
"- Use `code_mode` for JavaScript execution in a Node-backed `node:vm` context.\n",
);
section.push_str("- `code_mode` is a freeform/custom tool. Direct `code_mode` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n");
section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
let mut section = String::from("## Exec\n");
section.push_str(&format!(
"- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n",
));
section.push_str(&format!(
"- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n",
));
section.push_str(&format!(
"- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n",
));
section.push_str(&format!(
"- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n",
));
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
section.push_str("- Import `{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `code_mode` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
section.push_str(&format!(
"- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution; the default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n",
));
section.push_str(
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
);
Expand Down Expand Up @@ -149,19 +158,19 @@ async fn execute_node(

let mut child = cmd
.spawn()
.map_err(|err| format!("failed to start code_mode Node runtime: {err}"))?;
.map_err(|err| format!("failed to start {PUBLIC_TOOL_NAME} Node runtime: {err}"))?;
let stdout = child
.stdout
.take()
.ok_or_else(|| "code_mode runner missing stdout".to_string())?;
.ok_or_else(|| format!("{PUBLIC_TOOL_NAME} runner missing stdout"))?;
let stderr = child
.stderr
.take()
.ok_or_else(|| "code_mode runner missing stderr".to_string())?;
.ok_or_else(|| format!("{PUBLIC_TOOL_NAME} runner missing stderr"))?;
let mut stdin = child
.stdin
.take()
.ok_or_else(|| "code_mode runner missing stdin".to_string())?;
.ok_or_else(|| format!("{PUBLIC_TOOL_NAME} runner missing stdin"))?;

let stderr_task = tokio::spawn(async move {
let mut reader = BufReader::new(stderr);
Expand All @@ -185,13 +194,14 @@ async fn execute_node(
while let Some(line) = stdout_lines
.next_line()
.await
.map_err(|err| format!("failed to read code_mode runner stdout: {err}"))?
.map_err(|err| format!("failed to read {PUBLIC_TOOL_NAME} runner stdout: {err}"))?
{
if line.trim().is_empty() {
continue;
}
let message: NodeToHostMessage = serde_json::from_str(&line)
.map_err(|err| format!("invalid code_mode runner message: {err}; line={line}"))?;
let message: NodeToHostMessage = serde_json::from_str(&line).map_err(|err| {
format!("invalid {PUBLIC_TOOL_NAME} runner message: {err}; line={line}")
})?;
match message {
NodeToHostMessage::ToolCall { id, name, input } => {
let response = HostToNodeMessage::Response {
Expand Down Expand Up @@ -224,20 +234,20 @@ async fn execute_node(
let status = child
.wait()
.await
.map_err(|err| format!("failed to wait for code_mode runner: {err}"))?;
.map_err(|err| format!("failed to wait for {PUBLIC_TOOL_NAME} runner: {err}"))?;
let stderr = stderr_task
.await
.map_err(|err| format!("failed to collect code_mode stderr: {err}"))?;
.map_err(|err| format!("failed to collect {PUBLIC_TOOL_NAME} stderr: {err}"))?;

match final_content_items {
Some(content_items) if status.success() => Ok(content_items),
Some(_) => Err(format_runner_failure(
"code_mode execution failed",
&format!("{PUBLIC_TOOL_NAME} execution failed"),
status,
&stderr,
)),
None => Err(format_runner_failure(
"code_mode runner exited without returning a result",
&format!("{PUBLIC_TOOL_NAME} runner exited without returning a result"),
status,
&stderr,
)),
Expand All @@ -249,19 +259,19 @@ async fn write_message(
message: &HostToNodeMessage,
) -> Result<(), String> {
let line = serde_json::to_string(message)
.map_err(|err| format!("failed to serialize code_mode message: {err}"))?;
.map_err(|err| format!("failed to serialize {PUBLIC_TOOL_NAME} message: {err}"))?;
stdin
.write_all(line.as_bytes())
.await
.map_err(|err| format!("failed to write code_mode message: {err}"))?;
.map_err(|err| format!("failed to write {PUBLIC_TOOL_NAME} message: {err}"))?;
stdin
.write_all(b"\n")
.await
.map_err(|err| format!("failed to write code_mode message newline: {err}"))?;
.map_err(|err| format!("failed to write {PUBLIC_TOOL_NAME} message newline: {err}"))?;
stdin
.flush()
.await
.map_err(|err| format!("failed to flush code_mode message: {err}"))
.map_err(|err| format!("failed to flush {PUBLIC_TOOL_NAME} message: {err}"))
}

fn append_stderr(message: String, stderr: &str) -> String {
Expand Down Expand Up @@ -336,7 +346,7 @@ async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
let mut out = Vec::new();
for spec in router.specs() {
let tool_name = spec.name().to_string();
if tool_name == "code_mode" {
if tool_name == PUBLIC_TOOL_NAME {
continue;
}

Expand Down Expand Up @@ -385,8 +395,8 @@ async fn call_nested_tool(
tool_name: String,
input: Option<JsonValue>,
) -> JsonValue {
if tool_name == "code_mode" {
return JsonValue::String("code_mode cannot invoke itself".to_string());
if tool_name == PUBLIC_TOOL_NAME {
return JsonValue::String(format!("{PUBLIC_TOOL_NAME} cannot invoke itself"));
}

let router = build_nested_router(&exec).await;
Expand All @@ -410,7 +420,7 @@ async fn call_nested_tool(

let call = ToolCall {
tool_name: tool_name.clone(),
call_id: format!("code_mode-{}", uuid::Uuid::new_v4()),
call_id: format!("{PUBLIC_TOOL_NAME}-{}", uuid::Uuid::new_v4()),
payload,
};
let result = router
Expand Down Expand Up @@ -442,7 +452,7 @@ fn tool_kind_for_name(specs: &[ToolSpec], tool_name: &str) -> Result<CodeModeToo
.iter()
.find(|spec| spec.name() == tool_name)
.map(tool_kind_for_spec)
.ok_or_else(|| format!("tool `{tool_name}` is not enabled in code_mode"))
.ok_or_else(|| format!("tool `{tool_name}` is not enabled in {PUBLIC_TOOL_NAME}"))
}

fn build_nested_tool_payload(
Expand Down Expand Up @@ -496,8 +506,9 @@ fn output_content_items_from_json_values(
.into_iter()
.enumerate()
.map(|(index, item)| {
serde_json::from_value(item)
.map_err(|err| format!("invalid code_mode content item at index {index}: {err}"))
serde_json::from_value(item).map_err(|err| {
format!("invalid {PUBLIC_TOOL_NAME} content item at index {index}: {err}")
})
})
.collect()
}
6 changes: 3 additions & 3 deletions codex-rs/core/src/tools/code_mode_runner.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -317,14 +317,14 @@ function createModuleResolver(context, callTool, enabledTools, state) {
}
const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
if (!namespacedMatch) {
throw new Error(`Unsupported import in code_mode: ${specifier}`);
throw new Error(`Unsupported import in exec: ${specifier}`);
}

const namespace = namespacedMatch[1]
.split('/')
.filter((segment) => segment.length > 0);
if (namespace.length === 0) {
throw new Error(`Unsupported import in code_mode: ${specifier}`);
throw new Error(`Unsupported import in exec: ${specifier}`);
}

const cacheKey = namespace.join('/');
Expand All @@ -347,7 +347,7 @@ async function runModule(context, request, state, callTool) {
);
const mainModule = new SourceTextModule(request.source, {
context,
identifier: 'code_mode_main.mjs',
identifier: 'exec_main.mjs',
importModuleDynamically: async (specifier) => resolveModule(specifier),
});

Expand Down
13 changes: 7 additions & 6 deletions codex-rs/core/src/tools/handlers/code_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use async_trait::async_trait;
use crate::features::Feature;
use crate::function_tool::FunctionCallError;
use crate::tools::code_mode;
use crate::tools::code_mode::PUBLIC_TOOL_NAME;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolPayload;
Expand Down Expand Up @@ -33,17 +34,17 @@ impl ToolHandler for CodeModeHandler {
} = invocation;

if !session.features().enabled(Feature::CodeMode) {
return Err(FunctionCallError::RespondToModel(
"code_mode is disabled by feature flag".to_string(),
));
return Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} is disabled by feature flag"
)));
}

let code = match payload {
ToolPayload::Custom { input } => input,
_ => {
return Err(FunctionCallError::RespondToModel(
"code_mode expects raw JavaScript source text".to_string(),
));
return Err(FunctionCallError::RespondToModel(format!(
"{PUBLIC_TOOL_NAME} expects raw JavaScript source text"
)));
}
};

Expand Down
9 changes: 5 additions & 4 deletions codex-rs/core/src/tools/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::features::Feature;
use crate::features::Features;
use crate::mcp_connection_manager::ToolInfo;
use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig;
use crate::tools::code_mode::PUBLIC_TOOL_NAME;
use crate::tools::handlers::PLAN_TOOL;
use crate::tools::handlers::SEARCH_TOOL_BM25_DEFAULT_LIMIT;
use crate::tools::handlers::SEARCH_TOOL_BM25_TOOL_NAME;
Expand Down Expand Up @@ -1620,11 +1621,11 @@ source: /[\s\S]+/
enabled_tool_names.join(", ")
};
let description = format!(
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `code_mode` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution. The default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
);

ToolSpec::Freeform(FreeformTool {
name: "code_mode".to_string(),
name: PUBLIC_TOOL_NAME.to_string(),
description,
format: FreeformToolFormat {
r#type: "grammar".to_string(),
Expand Down Expand Up @@ -2026,12 +2027,12 @@ pub(crate) fn build_specs(
let mut enabled_tool_names = nested_specs
.into_iter()
.map(|spec| spec.spec.name().to_string())
.filter(|name| name != "code_mode")
.filter(|name| name != PUBLIC_TOOL_NAME)
.collect::<Vec<_>>();
enabled_tool_names.sort();
enabled_tool_names.dedup();
builder.push_spec(create_code_mode_tool(&enabled_tool_names));
builder.register_handler("code_mode", code_mode_handler);
builder.register_handler(PUBLIC_TOOL_NAME, code_mode_handler);
}

match &config.shell_type {
Expand Down
Loading
Loading