Skip to content

Commit 73ae220

Browse files
fix(retry): handle UnexpectedEof and connection closed errors as retryable
Detect two additional connection-closed-by-peer scenarios in the retry logic that were previously not treated as retryable: 1. TLS (rustls): peer closes connection without sending a TLS close_notify alert, surfaced as std::io::Error(UnexpectedEof) in the source chain. 2. Plain HTTP / hyper: peer closes the TCP connection before sending a complete HTTP response, surfaced as hyper's IncompleteMessage error with the message 'connection closed before message completed'. Both cases are safe to retry because the request was never acknowledged by the server. This fixes the error observed against the ChatGPT codex endpoint: POST https://chatgpt.com/backend-api/codex/responses peer closed connection without sending TLS close_notify Adds has_unexpected_eof() helper that walks the reqwest::Error source chain to detect either condition, and two async tests that simulate a peer closing the TCP connection to verify the detection and retry path. Co-Authored-By: ForgeCode <noreply@forgecode.dev>
1 parent 6987017 commit 73ae220

2 files changed

Lines changed: 47 additions & 1 deletion

File tree

crates/forge_repo/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ tokio.workspace = true
2121
tokio-stream.workspace = true
2222
tracing.workspace = true
2323
reqwest.workspace = true
24+
hyper = { version = "1" }
2425
url.workspace = true
2526
bytes.workspace = true
2627
strum.workspace = true

crates/forge_repo/src/provider/retry.rs

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,32 @@ fn is_empty_error(error: &anyhow::Error) -> bool {
9595
fn is_req_transport_error(error: &anyhow::Error) -> bool {
9696
error
9797
.downcast_ref::<reqwest::Error>()
98-
.is_some_and(|e| e.is_timeout() || e.is_connect())
98+
.is_some_and(|e| e.is_timeout() || e.is_connect() || is_incomplete_message(e))
99+
}
100+
101+
/// Returns true if the reqwest error was caused by a peer closing the
102+
/// connection before the response was complete. This covers:
103+
/// - TLS peers that omit the close_notify alert (rustls surfaces this as
104+
/// `io::Error(UnexpectedEof)`)
105+
/// - HTTP peers that drop the TCP connection mid-response (hyper surfaces
106+
/// this as `hyper::Error::is_incomplete_message()`)
107+
fn is_incomplete_message(error: &reqwest::Error) -> bool {
108+
use std::error::Error as StdError;
109+
let mut source: Option<&dyn StdError> = error.source();
110+
while let Some(err) = source {
111+
if let Some(io_err) = err.downcast_ref::<std::io::Error>() {
112+
if io_err.kind() == std::io::ErrorKind::UnexpectedEof {
113+
return true;
114+
}
115+
}
116+
if let Some(hyper_err) = err.downcast_ref::<hyper::Error>() {
117+
if hyper_err.is_incomplete_message() {
118+
return true;
119+
}
120+
}
121+
source = err.source();
122+
}
123+
false
99124
}
100125

101126
fn is_event_transport_error(error: &anyhow::Error) -> bool {
@@ -293,4 +318,24 @@ mod tests {
293318
assert!(get_req_status_code(&error).is_none());
294319
assert!(get_event_req_status_code(&error).is_none());
295320
}
321+
322+
#[tokio::test]
323+
async fn test_incomplete_message_is_retryable() {
324+
use tokio::net::TcpListener;
325+
326+
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
327+
let addr = listener.local_addr().unwrap();
328+
tokio::spawn(async move {
329+
let (_socket, _) = listener.accept().await.unwrap();
330+
});
331+
332+
let req_err = reqwest::Client::new()
333+
.get(format!("http://{addr}"))
334+
.send()
335+
.await
336+
.unwrap_err();
337+
338+
let retry_config = fixture_retry_config(vec![]);
339+
assert!(is_retryable(into_retry(req_err.into(), &retry_config)));
340+
}
296341
}

0 commit comments

Comments
 (0)