Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 41 additions & 2 deletions codex-rs/core/src/tools/code_mode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ use crate::tools::context::ToolPayload;
use crate::tools::js_repl::resolve_compatible_node;
use crate::tools::router::ToolCall;
use crate::tools::router::ToolCallSource;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text_content_items_with_policy;
use crate::truncate::truncate_function_output_items_with_policy;
use crate::unified_exec::resolve_max_tokens;
use codex_protocol::models::FunctionCallOutputContentItem;
use serde::Deserialize;
use serde::Serialize;
Expand Down Expand Up @@ -72,6 +76,8 @@ enum NodeToHostMessage {
},
Result {
content_items: Vec<JsonValue>,
#[serde(default)]
max_output_tokens_per_exec_call: Option<usize>,
},
}

Expand All @@ -88,6 +94,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
section.push_str("- Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
section.push_str(
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
);
Expand Down Expand Up @@ -187,8 +194,14 @@ async fn execute_node(
};
write_message(&mut stdin, &response).await?;
}
NodeToHostMessage::Result { content_items } => {
final_content_items = Some(output_content_items_from_json_values(content_items)?);
NodeToHostMessage::Result {
content_items,
max_output_tokens_per_exec_call,
} => {
final_content_items = Some(truncate_code_mode_result(
output_content_items_from_json_values(content_items)?,
max_output_tokens_per_exec_call,
));
break;
}
}
Expand Down Expand Up @@ -261,6 +274,32 @@ fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String
.replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code))
}

fn truncate_code_mode_result(
items: Vec<FunctionCallOutputContentItem>,
max_output_tokens_per_exec_call: Option<usize>,
) -> Vec<FunctionCallOutputContentItem> {
let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call);
if items
.iter()
.all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
{
let (mut truncated_items, original_token_count) =
formatted_truncate_text_content_items_with_policy(
&items,
TruncationPolicy::Tokens(max_output_tokens),
);
if let Some(original_token_count) = original_token_count
&& let Some(FunctionCallOutputContentItem::InputText { text }) =
truncated_items.first_mut()
{
*text = format!("Original token count: {original_token_count}\nOutput:\n{text}");
}
return truncated_items;
}

truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(max_output_tokens))
}

async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
let router = build_nested_router(exec).await;
let mcp_tool_names = exec
Expand Down
97 changes: 62 additions & 35 deletions codex-rs/core/src/tools/code_mode_runner.cjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ const readline = require('node:readline');
const vm = require('node:vm');

const { SourceTextModule, SyntheticModule } = vm;
const DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL = 10000;

function normalizeMaxOutputTokensPerExecCall(value) {
if (!Number.isSafeInteger(value) || value < 0) {
throw new TypeError('max_output_tokens_per_exec_call must be a non-negative safe integer');
}
return value;
}

function createProtocol() {
const rl = readline.createInterface({
Expand Down Expand Up @@ -100,17 +108,20 @@ function isValidIdentifier(name) {
return /^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name);
}

function createToolsNamespace(protocol, enabledTools) {
function createToolCaller(protocol) {
return (name, input) =>
protocol.request('tool_call', {
name: String(name),
input,
});
}

function createToolsNamespace(callTool, enabledTools) {
const tools = Object.create(null);

for (const { tool_name } of enabledTools) {
const callTool = async (args) =>
protocol.request('tool_call', {
name: String(tool_name),
input: args,
});
Object.defineProperty(tools, tool_name, {
value: callTool,
value: async (args) => callTool(tool_name, args),
configurable: false,
enumerable: true,
writable: false,
Expand All @@ -120,8 +131,8 @@ function createToolsNamespace(protocol, enabledTools) {
return Object.freeze(tools);
}

function createToolsModule(context, protocol, enabledTools) {
const tools = createToolsNamespace(protocol, enabledTools);
function createToolsModule(context, callTool, enabledTools) {
const tools = createToolsNamespace(callTool, enabledTools);
const exportNames = ['tools'];

for (const { tool_name } of enabledTools) {
Expand Down Expand Up @@ -153,7 +164,7 @@ function namespacesMatch(left, right) {
return left.every((segment, index) => segment === right[index]);
}

function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
function createNamespacedToolsNamespace(callTool, enabledTools, namespace) {
const tools = Object.create(null);

for (const tool of enabledTools) {
Expand All @@ -162,13 +173,8 @@ function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
continue;
}

const callTool = async (args) =>
protocol.request('tool_call', {
name: String(tool.tool_name),
input: args,
});
Object.defineProperty(tools, tool.name, {
value: callTool,
value: async (args) => callTool(tool.tool_name, args),
configurable: false,
enumerable: true,
writable: false,
Expand All @@ -178,8 +184,8 @@ function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
return Object.freeze(tools);
}

function createNamespacedToolsModule(context, protocol, enabledTools, namespace) {
const tools = createNamespacedToolsNamespace(protocol, enabledTools, namespace);
function createNamespacedToolsModule(context, callTool, enabledTools, namespace) {
const tools = createNamespacedToolsNamespace(callTool, enabledTools, namespace);
const exportNames = ['tools'];

for (const exportName of Object.keys(tools)) {
Expand All @@ -204,14 +210,32 @@ function createNamespacedToolsModule(context, protocol, enabledTools, namespace)
);
}

function createModuleResolver(context, protocol, enabledTools) {
const toolsModule = createToolsModule(context, protocol, enabledTools);
function createCodeModeModule(context, state) {
return new SyntheticModule(
['set_max_output_tokens_per_exec_call'],
function initCodeModeModule() {
this.setExport('set_max_output_tokens_per_exec_call', (value) => {
const normalized = normalizeMaxOutputTokensPerExecCall(value);
state.maxOutputTokensPerExecCall = normalized;
return normalized;
});
},
{ context }
);
}

function createModuleResolver(context, callTool, enabledTools, state) {
const toolsModule = createToolsModule(context, callTool, enabledTools);
const codeModeModule = createCodeModeModule(context, state);
const namespacedModules = new Map();

return function resolveModule(specifier) {
if (specifier === 'tools.js') {
return toolsModule;
}
if (specifier === '@openai/code_mode') {
return codeModeModule;
}

const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
if (!namespacedMatch) {
Expand All @@ -229,52 +253,55 @@ function createModuleResolver(context, protocol, enabledTools) {
if (!namespacedModules.has(cacheKey)) {
namespacedModules.set(
cacheKey,
createNamespacedToolsModule(context, protocol, enabledTools, namespace)
createNamespacedToolsModule(context, callTool, enabledTools, namespace)
);
}
return namespacedModules.get(cacheKey);
};
}

async function runModule(context, protocol, request) {
const resolveModule = createModuleResolver(context, protocol, request.enabled_tools ?? []);
async function runModule(context, protocol, request, state, callTool) {
const resolveModule = createModuleResolver(
context,
callTool,
request.enabled_tools ?? [],
state
);
const mainModule = new SourceTextModule(request.source, {
context,
identifier: 'code_mode_main.mjs',
importModuleDynamically(specifier) {
return resolveModule(specifier);
},
importModuleDynamically: async (specifier) => resolveModule(specifier),
});

await mainModule.link(async (specifier) => {
return resolveModule(specifier);
});
await mainModule.link(resolveModule);
await mainModule.evaluate();
}

async function main() {
const protocol = createProtocol();
const request = await protocol.init;
const state = {
maxOutputTokensPerExecCall: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
};
const callTool = createToolCaller(protocol);
const context = vm.createContext({
__codex_tool_call: async (name, input) =>
protocol.request('tool_call', {
name: String(name),
input,
}),
__codex_tool_call: callTool,
});

try {
await runModule(context, protocol, request);
await runModule(context, protocol, request, state, callTool);
await protocol.send({
type: 'result',
content_items: readContentItems(context),
max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
});
process.exit(0);
} catch (error) {
process.stderr.write(`${String(error && error.stack ? error.stack : error)}\n`);
await protocol.send({
type: 'result',
content_items: readContentItems(context),
max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
});
process.exit(1);
}
Expand Down
2 changes: 1 addition & 1 deletion codex-rs/core/src/tools/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1621,7 +1621,7 @@ source: /[\s\S]+/
enabled_tool_names.join(", ")
};
let description = format!(
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
);

ToolSpec::Freeform(FreeformTool {
Expand Down
Loading
Loading