Skip to content

Commit b93403a

Browse files
refactor(context_engine): remove retry logic and simplify error handling (#2407)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent e9af89f commit b93403a

29 files changed

Lines changed: 569 additions & 2169 deletions

File tree

Cargo.lock

Lines changed: 23 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ strip = true
1717
[workspace.dependencies]
1818
anyhow = "1.0.101"
1919
async-recursion = "1.1.1"
20+
async-stream = "0.3"
2021
async-trait = "0.1.89"
2122
aws-config = { version = "1.8.13", features = ["behavior-version-latest"], default-features = false }
2223
aws-sdk-bedrockruntime = { version = "1.124.0", features = ["behavior-version-latest"], default-features = false }

crates/forge_app/src/infra.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,21 @@ pub trait FileReaderInfra: Send + Sync {
4545
/// Returns the file content as a UTF-8 string.
4646
async fn read_utf8(&self, path: &Path) -> anyhow::Result<String>;
4747

48+
/// Reads multiple files in batches and returns a stream of file batches.
49+
///
50+
/// # Arguments
51+
/// * `batch_size` - Number of files to include in each batch
52+
/// * `paths` - Vector of file paths to read
53+
///
54+
/// Returns a stream where each item is a vector of tuples containing
55+
/// (file_path, file_content). Files within each batch are read concurrently
56+
/// for better performance.
57+
fn read_batch_utf8(
58+
&self,
59+
batch_size: usize,
60+
paths: Vec<PathBuf>,
61+
) -> impl futures::Stream<Item = anyhow::Result<Vec<(PathBuf, String)>>> + Send;
62+
4863
/// Reads the content of a file at the specified path.
4964
/// Returns the file content as raw bytes.
5065
async fn read(&self, path: &Path) -> anyhow::Result<Vec<u8>>;

crates/forge_app/src/orch_spec/orch_setup.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ impl Default for TestContext {
7575
stdout_max_prefix_length: 256,
7676
stdout_max_suffix_length: 256,
7777
max_read_size: 4096,
78+
max_file_read_batch_size: 50,
7879
http: HttpConfig::default(),
7980
max_file_size: 1024 * 1024 * 5,
8081
max_search_result_bytes: 200,

crates/forge_app/src/workspace_status.rs

Lines changed: 18 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,25 @@
11
use std::collections::{BTreeSet, HashMap};
22

3-
use forge_domain::{FileHash, FileNode, FileStatus, SyncProgress, SyncStatus};
3+
use forge_domain::{FileHash, FileStatus, SyncProgress, SyncStatus};
44

55
/// Result of comparing local and server files
66
///
7-
/// This struct stores local and remote file information and provides methods
7+
/// This struct stores remote file information and provides methods
88
/// to compute synchronization operations on-demand. It can derive file statuses
99
/// and identify which files need to be uploaded, deleted, or modified.
1010
pub struct WorkspaceStatus {
11-
/// Local files with their content and hashes
12-
local_files: Vec<FileNode>,
1311
/// Remote file hashes from the server
1412
remote_files: Vec<FileHash>,
1513
}
1614

1715
impl WorkspaceStatus {
18-
/// Creates a sync plan from local files and remote file hashes.
16+
/// Creates a sync plan from remote file hashes.
1917
///
2018
/// # Arguments
2119
///
22-
/// * `local_files` - Vector of local files with their content and hashes
2320
/// * `remote_files` - Vector of remote file hashes from the server
24-
pub fn new(local_files: Vec<FileNode>, remote_files: Vec<FileHash>) -> Self {
25-
Self { local_files, remote_files }
21+
pub fn new(remote_files: Vec<FileHash>) -> Self {
22+
Self { remote_files }
2623
}
2724

2825
/// Derives file sync statuses by comparing local and remote files.
@@ -34,12 +31,11 @@ impl WorkspaceStatus {
3431
/// - `Modified`: File exists in both but with different hashes
3532
/// - `New`: File exists only locally
3633
/// - `Deleted`: File exists only remotely
37-
pub fn file_statuses(&self) -> Vec<FileStatus> {
34+
pub fn file_statuses(&self, local_files: Vec<FileHash>) -> Vec<FileStatus> {
3835
// Build hash maps for efficient lookup
39-
let local_hashes: HashMap<&str, &str> = self
40-
.local_files
36+
let local_hashes: HashMap<&str, &str> = local_files
4137
.iter()
42-
.map(|f| (f.file_path.as_str(), f.hash.as_str()))
38+
.map(|f| (f.path.as_str(), f.hash.as_str()))
4339
.collect();
4440
let remote_hashes: HashMap<&str, &str> = self
4541
.remote_files
@@ -78,38 +74,16 @@ impl WorkspaceStatus {
7874
///
7975
/// A tuple of (files_to_delete, files_to_upload) where:
8076
/// - `files_to_delete`: Vector of file paths to delete from remote
81-
/// - `files_to_upload`: Vector of files to upload to remote
82-
pub fn get_operations(&self) -> (Vec<String>, Vec<forge_domain::FileRead>) {
83-
let statuses = self.file_statuses();
77+
/// - `files_to_upload`: Vector of file paths to upload to remote
78+
pub fn get_operations(&self, local_files: Vec<FileHash>) -> (Vec<String>, Vec<String>) {
79+
let statuses = self.file_statuses(local_files);
8480
let mut files_to_delete = Vec::new();
8581
let mut files_to_upload = Vec::new();
8682

87-
// Create a map for quick lookup of local files
88-
let local_files_map: HashMap<&str, &FileNode> = self
89-
.local_files
90-
.iter()
91-
.map(|f| (f.file_path.as_str(), f))
92-
.collect();
93-
9483
for status in statuses {
9584
match status.status {
96-
SyncStatus::Modified => {
97-
// Note: Modified files already exist in the database and will be
98-
// automatically deleted by the backend.
99-
if let Some(file) = local_files_map.get(status.path.as_str()) {
100-
files_to_upload.push(forge_domain::FileRead::new(
101-
file.file_path.clone(),
102-
file.content.clone(),
103-
));
104-
}
105-
}
106-
SyncStatus::New => {
107-
if let Some(file) = local_files_map.get(status.path.as_str()) {
108-
files_to_upload.push(forge_domain::FileRead::new(
109-
file.file_path.clone(),
110-
file.content.clone(),
111-
));
112-
}
85+
SyncStatus::Modified | SyncStatus::New => {
86+
files_to_upload.push(status.path);
11387
}
11488
SyncStatus::Deleted => {
11589
files_to_delete.push(status.path);
@@ -123,7 +97,6 @@ impl WorkspaceStatus {
12397
(files_to_delete, files_to_upload)
12498
}
12599
}
126-
127100
/// Tracks progress of sync operations
128101
pub struct SyncProgressCounter {
129102
total_files: usize,
@@ -162,30 +135,18 @@ mod tests {
162135
#[test]
163136
fn test_file_statuses() {
164137
let local = vec![
165-
FileNode {
166-
file_path: "a.rs".into(),
167-
content: "content_a".into(),
168-
hash: "hash_a".into(),
169-
},
170-
FileNode {
171-
file_path: "b.rs".into(),
172-
content: "modified_content".into(),
173-
hash: "new_hash".into(),
174-
},
175-
FileNode {
176-
file_path: "d.rs".into(),
177-
content: "content_d".into(),
178-
hash: "hash_d".into(),
179-
},
138+
FileHash { path: "a.rs".into(), hash: "hash_a".into() },
139+
FileHash { path: "b.rs".into(), hash: "new_hash".into() },
140+
FileHash { path: "d.rs".into(), hash: "hash_d".into() },
180141
];
181142
let remote = vec![
182143
FileHash { path: "a.rs".into(), hash: "hash_a".into() },
183144
FileHash { path: "b.rs".into(), hash: "old_hash".into() },
184145
FileHash { path: "c.rs".into(), hash: "hash_c".into() },
185146
];
186147

187-
let plan = WorkspaceStatus::new(local, remote);
188-
let actual = plan.file_statuses();
148+
let plan = WorkspaceStatus::new(remote);
149+
let actual = plan.file_statuses(local);
189150

190151
let expected = vec![
191152
forge_domain::FileStatus::new("a.rs".to_string(), forge_domain::SyncStatus::InSync),

crates/forge_domain/src/env.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ pub struct Environment {
5252
pub max_line_length: usize,
5353
/// Maximum number of lines to read from a file
5454
pub max_read_size: u64,
55+
/// Maximum number of files that can be read in a single batch operation.
56+
/// Controlled by FORGE_MAX_READ_BATCH_SIZE environment variable.
57+
pub max_file_read_batch_size: usize,
5558
/// Http configuration
5659
pub http: HttpConfig,
5760
/// Maximum file size in bytes for operations
@@ -289,6 +292,7 @@ fn test_command_path() {
289292
stdout_max_line_length: 500,
290293
max_line_length: 2000,
291294
max_read_size: 2000,
295+
max_file_read_batch_size: 50,
292296
http: HttpConfig::default(),
293297
max_file_size: 104857600,
294298
tool_timeout: 300,
@@ -329,6 +333,7 @@ fn test_command_cwd_path() {
329333
stdout_max_line_length: 500,
330334
max_line_length: 2000,
331335
max_read_size: 2000,
336+
max_file_read_batch_size: 50,
332337
http: HttpConfig::default(),
333338
max_file_size: 104857600,
334339
tool_timeout: 300,
@@ -369,6 +374,7 @@ fn test_command_cwd_path_independent_from_command_path() {
369374
stdout_max_line_length: 500,
370375
max_line_length: 2000,
371376
max_read_size: 2000,
377+
max_file_read_batch_size: 50,
372378
http: HttpConfig::default(),
373379
max_file_size: 104857600,
374380
tool_timeout: 300,

crates/forge_domain/src/error.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ pub enum Error {
9999
#[error("Workspace already initialized with id: {0}")]
100100
WorkspaceAlreadyInitialized(WorkspaceId),
101101

102+
#[error("Failed to sync {count} file(s)")]
103+
SyncFailed { count: usize },
104+
102105
#[error("No default provider set.")]
103106
NoDefaultProvider,
104107

@@ -149,6 +152,10 @@ impl Error {
149152
pub fn no_default_model(provider: ProviderId) -> Self {
150153
Self::NoDefaultModel(provider)
151154
}
155+
156+
pub fn sync_failed(count: usize) -> Self {
157+
Self::SyncFailed { count }
158+
}
152159
}
153160

154161
#[cfg(test)]

crates/forge_domain/src/file.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,12 @@ pub struct FileHash {
4242
pub hash: String,
4343
}
4444

45+
impl From<super::node::FileNode> for FileHash {
46+
fn from(node: super::node::FileNode) -> Self {
47+
Self { path: node.file_path, hash: node.hash }
48+
}
49+
}
50+
4551
/// Status of a file in relation to the server
4652
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, PartialOrd, Ord)]
4753
pub enum SyncStatus {

crates/forge_domain/src/repo.rs

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use url::Url;
66
use crate::{
77
AnyProvider, AppConfig, AuthCredential, ChatCompletionMessage, Context, Conversation,
88
ConversationId, MigrationResult, Model, ModelId, Provider, ProviderId, ProviderTemplate,
9-
ResultStream, SearchMatch, Skill, Snapshot, UserId, Workspace, WorkspaceAuth, WorkspaceId,
9+
ResultStream, SearchMatch, Skill, Snapshot, WorkspaceAuth, WorkspaceId,
1010
};
1111

1212
/// Repository for managing file snapshots
@@ -116,40 +116,6 @@ pub trait ProviderRepository: Send + Sync {
116116
async fn migrate_env_credentials(&self) -> anyhow::Result<Option<MigrationResult>>;
117117
}
118118

119-
/// Repository for managing workspace metadata in local database
120-
#[async_trait::async_trait]
121-
pub trait WorkspaceRepository: Send + Sync {
122-
/// Save or update a workspace
123-
async fn upsert(
124-
&self,
125-
workspace_id: &WorkspaceId,
126-
user_id: &UserId,
127-
path: &std::path::Path,
128-
) -> anyhow::Result<()>;
129-
130-
/// Find all workspaces for a user
131-
///
132-
/// Returns all workspaces belonging to the specified user.
133-
/// Path matching and selection logic should be handled in the service
134-
/// layer.
135-
///
136-
/// # Arguments
137-
/// * `user_id` - Only return workspaces belonging to this user
138-
///
139-
/// # Returns
140-
/// A vector of all workspaces for the user (may be empty)
141-
///
142-
/// # Errors
143-
/// Returns an error if there's a database error
144-
async fn list(&self) -> anyhow::Result<Vec<Workspace>>;
145-
146-
/// Get user ID from any workspace, or None if no workspaces exist
147-
async fn get_user_id(&self) -> anyhow::Result<Option<UserId>>;
148-
149-
/// Delete workspace from local database
150-
async fn delete(&self, workspace_id: &WorkspaceId) -> anyhow::Result<()>;
151-
}
152-
153119
/// Repository for managing workspace indexing and search operations
154120
#[async_trait::async_trait]
155121
pub trait WorkspaceIndexRepository: Send + Sync {

crates/forge_domain/src/workspace.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ use derive_more::Display;
22
use serde::{Deserialize, Serialize};
33
use uuid::Uuid;
44

5-
use crate::UserId;
6-
75
/// Workspace identifier (UUID) from workspace server.
86
///
97
/// Generated locally and sent to server during CreateWorkspace.
@@ -30,13 +28,3 @@ impl WorkspaceId {
3028
self.0
3129
}
3230
}
33-
34-
/// Domain entity for workspace
35-
#[derive(Debug, Clone)]
36-
pub struct Workspace {
37-
pub workspace_id: WorkspaceId,
38-
pub user_id: UserId,
39-
pub path: std::path::PathBuf,
40-
pub created_at: chrono::DateTime<chrono::Utc>,
41-
pub updated_at: Option<chrono::DateTime<chrono::Utc>>,
42-
}

0 commit comments

Comments
 (0)