Skip to content

Commit 4f1ad6b

Browse files
amitksingh1490autofix-ci[bot]tusharmath
authored
feat(todo): add todo_write tool for task tracking (#2362)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Tushar <tusharmath@gmail.com>
1 parent eafdac7 commit 4f1ad6b

31 files changed

Lines changed: 1457 additions & 37 deletions

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Evaluation for checking appropriate todo_write tool usage
2+
run:
3+
- git clone --depth=1 --branch main https://github.com/antinomyhq/forge .
4+
- FORGE_OVERRIDE_PROVIDER=open_router FORGE_OVERRIDE_MODEL={{model}} FORGE_DEBUG_REQUESTS='{{dir}}/context.json' forgee -p '{{task}}'
5+
parallelism: 3
6+
timeout: 240
7+
early_exit: true
8+
validations:
9+
- name: "Uses todo_write tool for multi-step tasks"
10+
type: shell
11+
command: |
12+
# Check if todo_write was called at least once
13+
jq -e '[.messages[]?.tool_calls[]? | select(.function?.name == "todo_write")] | length > 0' {{dir}}/context.json
14+
15+
sources:
16+
- value:
17+
- model: "anthropic/claude-sonnet-4.5"
18+
- csv: todo_write_usage_tasks.csv
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
task
2+
"Add a dark mode toggle to the application. Create the toggle component, add state management, implement CSS styles, and update existing components to support theme switching."
3+
"Refactor the authentication system: extract the login logic into a separate service, update all imports, add unit tests, and update the documentation."
4+
"Implement a new feature for exporting user data. Add the export button to the UI, create the export service with CSV and JSON support, add backend API endpoint, and write integration tests."
5+
"Optimize the application performance. Profile the code to identify bottlenecks, implement memoization where needed, add virtualization for long lists, optimize images, and measure the improvement."
6+
"Update the project dependencies. Review the package.json for outdated packages, update them one by one testing after each, update the lock file, run all tests, and document any breaking changes."
7+
"Set up CI/CD pipeline. Configure GitHub Actions workflow, add build steps, set up testing, add deployment to staging, configure production deployment with approval, and document the process."

crates/forge_app/src/compact.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,61 @@ mod tests {
418418
insta::assert_snapshot!(actual);
419419
}
420420

421+
#[test]
422+
fn test_template_engine_renders_todo_write() {
423+
use forge_domain::{
424+
ContextSummary, Role, SummaryBlock, SummaryMessage, SummaryTool, SummaryToolCall, Todo,
425+
TodoChange, TodoChangeKind, TodoStatus,
426+
};
427+
428+
// Create test data with todo_write tool call showing a diff
429+
let changes = vec![
430+
TodoChange {
431+
todo: Todo::new("Implement user authentication")
432+
.id("1")
433+
.status(TodoStatus::Completed),
434+
kind: TodoChangeKind::Updated,
435+
},
436+
TodoChange {
437+
todo: Todo::new("Add database migrations")
438+
.id("2")
439+
.status(TodoStatus::InProgress),
440+
kind: TodoChangeKind::Added,
441+
},
442+
TodoChange {
443+
todo: Todo::new("Write documentation")
444+
.id("3")
445+
.status(TodoStatus::Pending),
446+
kind: TodoChangeKind::Removed,
447+
},
448+
];
449+
450+
let messages = vec![
451+
SummaryBlock::new(
452+
Role::User,
453+
vec![SummaryMessage::content("Create a task plan")],
454+
),
455+
SummaryBlock::new(
456+
Role::Assistant,
457+
vec![
458+
SummaryToolCall {
459+
id: Some(forge_domain::ToolCallId::new("call_1")),
460+
tool: SummaryTool::TodoWrite { changes },
461+
is_success: true,
462+
}
463+
.into(),
464+
],
465+
),
466+
];
467+
468+
let context_summary = ContextSummary { messages };
469+
let data = serde_json::json!({"messages": context_summary.messages});
470+
471+
let actual = render_template(&data);
472+
473+
insta::assert_snapshot!(actual);
474+
}
475+
421476
#[tokio::test]
422477
async fn test_render_summary_frame_snapshot() {
423478
// Load the conversation fixture

crates/forge_app/src/fmt/fmt_input.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,12 @@ impl FormatContent for ToolCatalog {
119119
.sub_title(input.name.to_lowercase())
120120
.into(),
121121
),
122+
ToolCatalog::TodoWrite(input) => Some(
123+
TitleFormat::debug("Update Todos")
124+
.sub_title(format!("{} item(s)", input.todos.len()))
125+
.into(),
126+
),
127+
ToolCatalog::TodoRead(_) => Some(TitleFormat::debug("Read Todos").into()),
122128
}
123129
}
124130
}

crates/forge_app/src/fmt/fmt_output.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use forge_display::DiffFormat;
22
use forge_domain::{ChatResponseContent, Environment, TitleFormat};
33

44
use crate::fmt::content::FormatContent;
5+
use crate::fmt::todo_fmt::{format_todos, format_todos_diff};
56
use crate::operation::ToolOperation;
67
use crate::utils::format_display_path;
78

@@ -30,6 +31,12 @@ impl FormatContent for ToolOperation {
3031
));
3132
title.into()
3233
}),
34+
ToolOperation::TodoWrite { before, after } => Some(ChatResponseContent::ToolOutput(
35+
format_todos_diff(before, after),
36+
)),
37+
ToolOperation::TodoRead { output } => {
38+
Some(ChatResponseContent::ToolOutput(format_todos(output)))
39+
}
3340
ToolOperation::FsRead { input: _, output: _ }
3441
| ToolOperation::FsRemove { input: _, output: _ }
3542
| ToolOperation::FsSearch { input: _, output: _ }

crates/forge_app/src/fmt/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
pub mod content;
22
pub mod fmt_input;
33
pub mod fmt_output;
4+
pub mod todo_fmt;
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
source: crates/forge_app/src/fmt/todo_fmt.rs
3+
expression: actual
4+
---
5+
6+
󰄵 Task 1
7+
󰄱 Task 3
8+
󰄱 Task 2
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
use forge_domain::Todo;
2+
3+
/// Controls the styling applied to a rendered todo line.
4+
enum TodoLineStyle {
5+
/// Bold styling used for new or changed todos.
6+
Bold,
7+
/// Dim styling used for unchanged todos.
8+
Dim,
9+
}
10+
11+
/// Renders one todo line with icon and ANSI styling.
12+
///
13+
/// # Arguments
14+
///
15+
/// * `todo` - Todo item to render.
16+
/// * `line_style` - Emphasis style for the line.
17+
fn format_todo_line(todo: &Todo, line_style: TodoLineStyle) -> String {
18+
use console::style;
19+
use forge_domain::TodoStatus;
20+
21+
let checkbox = match todo.status {
22+
TodoStatus::Completed => "󰄵",
23+
TodoStatus::InProgress => "󰄗",
24+
TodoStatus::Pending => "󰄱",
25+
};
26+
27+
let content = match todo.status {
28+
TodoStatus::Completed => style(todo.content.as_str()).strikethrough().to_string(),
29+
_ => todo.content.clone(),
30+
};
31+
32+
let line = format!(" {checkbox} {content}");
33+
let styled = match (&todo.status, line_style) {
34+
(TodoStatus::Pending, TodoLineStyle::Bold) => style(line).white().bold().to_string(),
35+
(TodoStatus::Pending, TodoLineStyle::Dim) => style(line).white().dim().to_string(),
36+
(TodoStatus::InProgress, TodoLineStyle::Bold) => style(line).cyan().bold().to_string(),
37+
(TodoStatus::InProgress, TodoLineStyle::Dim) => style(line).cyan().dim().to_string(),
38+
(TodoStatus::Completed, TodoLineStyle::Bold) => style(line).green().bold().to_string(),
39+
(TodoStatus::Completed, TodoLineStyle::Dim) => style(line).green().dim().to_string(),
40+
};
41+
42+
format!("{styled}\n")
43+
}
44+
45+
/// Formats a todo diff showing all todos in `after` plus removed todos from
46+
/// `before`.
47+
///
48+
/// # Arguments
49+
///
50+
/// * `before` - Previous todo list state.
51+
/// * `after` - New todo list state.
52+
pub(crate) fn format_todos_diff(before: &[Todo], after: &[Todo]) -> String {
53+
use console::style;
54+
55+
let before_map: std::collections::HashMap<&str, &Todo> =
56+
before.iter().map(|todo| (todo.id.as_str(), todo)).collect();
57+
let after_ids: std::collections::HashSet<&str> =
58+
after.iter().map(|todo| todo.id.as_str()).collect();
59+
60+
let mut result = "\n".to_string();
61+
62+
for todo in after {
63+
let previous = before_map.get(todo.id.as_str()).copied();
64+
let is_new = previous.is_none();
65+
let is_changed = previous
66+
.map(|item| item.status != todo.status || item.content != todo.content)
67+
.unwrap_or(false);
68+
69+
let line_style = if is_new || is_changed {
70+
TodoLineStyle::Bold
71+
} else {
72+
TodoLineStyle::Dim
73+
};
74+
75+
result.push_str(&format_todo_line(todo, line_style));
76+
}
77+
78+
for todo in before {
79+
if !after_ids.contains(todo.id.as_str()) {
80+
let content = style(todo.content.as_str()).strikethrough().to_string();
81+
result.push_str(&format!(" {}\n", style(format!("󰄱 {content}")).red()));
82+
}
83+
}
84+
85+
result
86+
}
87+
88+
/// Formats todos as ANSI-styled checklist lines.
89+
///
90+
/// # Arguments
91+
///
92+
/// * `todos` - Todo list to format.
93+
pub(crate) fn format_todos(todos: &[Todo]) -> String {
94+
if todos.is_empty() {
95+
return String::new();
96+
}
97+
98+
let mut result = "\n".to_string();
99+
100+
for todo in todos {
101+
result.push_str(&format_todo_line(todo, TodoLineStyle::Dim));
102+
}
103+
104+
result
105+
}
106+
107+
#[cfg(test)]
108+
mod tests {
109+
use console::strip_ansi_codes;
110+
use forge_domain::{ChatResponseContent, Environment, Todo, TodoStatus};
111+
use insta::assert_snapshot;
112+
113+
use crate::fmt::content::FormatContent;
114+
use crate::operation::ToolOperation;
115+
116+
fn fixture_environment() -> Environment {
117+
use fake::{Fake, Faker};
118+
119+
let max_bytes: f64 = 250.0 * 1024.0;
120+
let fixture: Environment = Faker.fake();
121+
122+
fixture
123+
.max_search_lines(25)
124+
.max_search_result_bytes(max_bytes.ceil() as usize)
125+
.fetch_truncation_limit(55)
126+
.max_read_size(10)
127+
.stdout_max_prefix_length(10)
128+
.stdout_max_suffix_length(10)
129+
.max_line_length(100)
130+
.max_file_size(0)
131+
}
132+
133+
fn fixture_todo(content: &str, id: &str, status: TodoStatus) -> Todo {
134+
Todo::new(content).id(id).status(status)
135+
}
136+
137+
fn fixture_todo_write_output(before: Vec<Todo>, after: Vec<Todo>) -> String {
138+
let setup = ToolOperation::TodoWrite { before, after };
139+
let actual = setup.to_content(&fixture_environment());
140+
141+
if let Some(ChatResponseContent::ToolOutput(output)) = actual {
142+
strip_ansi_codes(output.as_str()).to_string()
143+
} else {
144+
panic!("Expected ToolOutput content")
145+
}
146+
}
147+
148+
#[test]
149+
fn test_todo_write_mixed_changes_snapshot() {
150+
let setup = (
151+
vec![
152+
fixture_todo("Task 1", "1", TodoStatus::Pending),
153+
fixture_todo("Task 2", "2", TodoStatus::InProgress),
154+
],
155+
vec![
156+
fixture_todo("Task 1", "1", TodoStatus::Completed),
157+
fixture_todo("Task 3", "3", TodoStatus::Pending),
158+
],
159+
);
160+
161+
let actual = fixture_todo_write_output(setup.0, setup.1);
162+
assert_snapshot!(actual);
163+
}
164+
}

0 commit comments

Comments
 (0)