Skip to content

Commit c80f03d

Browse files
Implements the updated Retry Behavior behind a feature flag
1 parent d019034 commit c80f03d

13 files changed

Lines changed: 1327 additions & 39 deletions
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"core": {
3+
"changeLogMessages": [
4+
"Implements the updated Retry Behavior behind a feature flag (AWS_NEW_RETRIES_2026)."
5+
],
6+
"type": "minor",
7+
"updateMinimum": true,
8+
"backwardIncompatibilitiesToIgnore": [
9+
"Amazon.Runtime.Internal.CapacityManager/MethodRemoved",
10+
"Amazon.Runtime.Internal.StandardRetryPolicy/MethodRemoved"
11+
]
12+
},
13+
"services": [
14+
{
15+
"serviceName": "DynamoDBv2",
16+
"type": "patch",
17+
"changeLogMessages": [
18+
"Remove max-retries from DynamoDB service and depend on values applied in the `AWSSDK.Core` package."
19+
]
20+
}
21+
]
22+
}

sdk/src/Core/Amazon.Runtime/CapacityManager.cs

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,12 @@ public enum CapacityType
4141
/// <summary>
4242
/// The timeout capacity type uses the timeout capacity amount.
4343
/// </summary>
44-
Timeout
44+
Timeout,
45+
/// <summary>
46+
/// The throttling capacity type uses the throttling retry cost amount.
47+
/// Used when the new retry behavior (SEP 2.1) is enabled.
48+
/// </summary>
49+
Throttling
4550
}
4651

4752

@@ -63,17 +68,21 @@ protected virtual void Dispose(bool disposing)
6368
}
6469
}
6570

66-
public CapacityManager(int throttleRetryCount, int throttleRetryCost, int throttleCost)
67-
: this(throttleRetryCount, throttleRetryCost, throttleCost, throttleRetryCost)
68-
{
69-
}
70-
71-
public CapacityManager(int throttleRetryCount, int throttleRetryCost, int throttleCost, int timeoutRetryCost)
71+
/// <summary>
72+
/// Constructor for CapacityManager.
73+
/// </summary>
74+
/// <param name="initialRetryTokens">The initial and maximum number of retry tokens.</param>
75+
/// <param name="retryCost">The cost of a non-throttling retry.</param>
76+
/// <param name="noRetryIncrement">The capacity to add on a successful non-retry request.</param>
77+
/// <param name="timeoutRetryCost">The cost of a timeout retry.</param>
78+
/// <param name="throttlingRetryCost">The cost of a throttling retry (0 if not applicable).</param>
79+
public CapacityManager(int initialRetryTokens, int retryCost, int noRetryIncrement, int timeoutRetryCost, int throttlingRetryCost)
7280
{
73-
retryCost = throttleRetryCost;
74-
initialRetryTokens = throttleRetryCount;
75-
noRetryIncrement = throttleCost;
81+
this.retryCost = retryCost;
82+
this.initialRetryTokens = initialRetryTokens;
83+
this.noRetryIncrement = noRetryIncrement;
7684
this.timeoutRetryCost = timeoutRetryCost;
85+
this.throttlingRetryCost = throttlingRetryCost;
7786
}
7887

7988
/// <summary>
@@ -92,7 +101,19 @@ public bool TryAcquireCapacity(RetryCapacity retryCapacity)
92101
/// <param name="capacityType">Specifies what capacity type cost to use for obtaining capacity</param>
93102
public bool TryAcquireCapacity(RetryCapacity retryCapacity, CapacityType capacityType)
94103
{
95-
var capacityCost = capacityType == CapacityType.Timeout ? timeoutRetryCost : retryCost;
104+
int capacityCost;
105+
switch (capacityType)
106+
{
107+
case CapacityType.Timeout:
108+
capacityCost = timeoutRetryCost;
109+
break;
110+
case CapacityType.Throttling:
111+
capacityCost = throttlingRetryCost;
112+
break;
113+
default:
114+
capacityCost = retryCost;
115+
break;
116+
}
96117
if (capacityCost < 0)
97118
{
98119
return false;
@@ -127,6 +148,9 @@ public void ReleaseCapacity(CapacityType capacityType, RetryCapacity retryCapaci
127148
case CapacityType.Timeout:
128149
ReleaseCapacity(timeoutRetryCost, retryCapacity);
129150
break;
151+
case CapacityType.Throttling:
152+
ReleaseCapacity(throttlingRetryCost, retryCapacity);
153+
break;
130154
case CapacityType.Increment:
131155
ReleaseCapacity(noRetryIncrement, retryCapacity);
132156
break;
@@ -163,6 +187,10 @@ public RetryCapacity GetRetryCapacity(string serviceURL)
163187
// legacy retry modes and 10 for all other retry modes.
164188
private readonly int timeoutRetryCost;
165189

190+
// This parameter sets the cost of making a retry call when the error is a throttling error.
191+
// Used when the new retry behavior (SEP 2.1) is enabled. The default value is 5.
192+
private readonly int throttlingRetryCost;
193+
166194
// Maximum capacity in a bucket set to 100 for legacy retry mode and 500 for all other retry modes.
167195
private readonly int initialRetryTokens;
168196

@@ -199,7 +227,7 @@ private RetryCapacity AddNewRetryCapacity(string serviceURL)
199227
_rwlock.EnterWriteLock();
200228
try
201229
{
202-
retryCapacity = new RetryCapacity(retryCost * initialRetryTokens);
230+
retryCapacity = new RetryCapacity(initialRetryTokens);
203231
_serviceUrlToCapacityMap.Add(serviceURL, retryCapacity);
204232
return retryCapacity;
205233
}

sdk/src/Core/Amazon.Runtime/Pipeline/RetryHandler/DefaultRetryPolicy.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public partial class DefaultRetryPolicy : RetryPolicy
3131
//The status code returned from a service request when an invalid endpoint is used.
3232
private const int INVALID_ENDPOINT_EXCEPTION_STATUSCODE = 421;
3333
//Holds on to the singleton instance.
34-
private static readonly CapacityManager _capacityManagerInstance = new CapacityManager(throttleRetryCount: 100, throttleRetryCost: 5, throttleCost: 1);
34+
private static readonly CapacityManager _capacityManagerInstance = new CapacityManager(initialRetryTokens: 500, retryCost: 5, noRetryIncrement: 1, timeoutRetryCost: 5, throttlingRetryCost: 0);
3535

3636
private static readonly HashSet<string> _netStandardRetryErrorMessages = new HashSet<string>(StringComparer.OrdinalIgnoreCase)
3737
{

sdk/src/Core/Amazon.Runtime/Pipeline/RetryHandler/RetryHandler.cs

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,21 @@ public override void InvokeSync(IExecutionContext executionContext)
121121
shouldRetry = this.RetryPolicy.Retry(executionContext, exception);
122122
if (!shouldRetry)
123123
{
124+
// SEP 2.1: Long-polling operations must always back off when the error is
125+
// retryable and retry quota is exhausted (but NOT when max attempts is reached).
126+
// The RetryLimitReached check distinguishes quota exhaustion from max attempts.
127+
if (IsLongPollingOperation(executionContext)
128+
&& requestContext.IsLastExceptionRetryable
129+
&& !this.RetryPolicy.RetryLimitReached(executionContext))
130+
{
131+
// Temporarily increment Retries so the backoff formula computes the
132+
// delay for the correct attempt index (i=1 for first failure), then
133+
// restore the original value so LogForError reports accurately.
134+
requestContext.Retries++;
135+
this.RetryPolicy.WaitBeforeRetry(executionContext);
136+
requestContext.Retries--;
137+
}
138+
124139
LogForError(requestContext, exception);
125140
throw;
126141
}
@@ -215,6 +230,21 @@ public override async System.Threading.Tasks.Task<T> InvokeAsync<T>(IExecutionCo
215230
shouldRetry = await this.RetryPolicy.RetryAsync(executionContext, capturedException.SourceException).ConfigureAwait(false);
216231
if (!shouldRetry)
217232
{
233+
// SEP 2.1: Long-polling operations must always back off when the error is
234+
// retryable and retry quota is exhausted (but NOT when max attempts is reached).
235+
// The RetryLimitReached check distinguishes quota exhaustion from max attempts.
236+
if (IsLongPollingOperation(executionContext)
237+
&& requestContext.IsLastExceptionRetryable
238+
&& !this.RetryPolicy.RetryLimitReached(executionContext))
239+
{
240+
// Temporarily increment Retries so the backoff formula computes the
241+
// delay for the correct attempt index (i=1 for first failure), then
242+
// restore the original value so LogForError reports accurately.
243+
requestContext.Retries++;
244+
await RetryPolicy.WaitBeforeRetryAsync(executionContext).ConfigureAwait(false);
245+
requestContext.Retries--;
246+
}
247+
218248
LogForError(requestContext, capturedException.SourceException);
219249
capturedException.Throw();
220250
}
@@ -349,6 +379,35 @@ private bool ShouldRetryForStaleConnection(
349379
return false;
350380
}
351381

382+
/// <summary>
383+
/// Determines if the current operation is a long-polling operation that should always back off
384+
/// when retryable, even if retry quota is exhausted.
385+
/// </summary>
386+
private static bool IsLongPollingOperation(IExecutionContext executionContext)
387+
{
388+
if (!RetryPolicy.UseNewRetries2026) return false;
389+
390+
// TODO: Check longPoll trait from model when available in C2J models.
391+
// Until the trait is available, use hard-coded service/operation combinations.
392+
var serviceId = executionContext.RequestContext.ClientConfig?.ServiceId;
393+
var operationName = AWSSDKUtils.ExtractOperationName(executionContext.RequestContext.RequestName);
394+
395+
if (string.Equals(serviceId, "SQS", StringComparison.OrdinalIgnoreCase) &&
396+
string.Equals(operationName, "ReceiveMessage", StringComparison.OrdinalIgnoreCase))
397+
return true;
398+
399+
if (string.Equals(serviceId, "SFN", StringComparison.OrdinalIgnoreCase) &&
400+
string.Equals(operationName, "GetActivityTask", StringComparison.OrdinalIgnoreCase))
401+
return true;
402+
403+
if (string.Equals(serviceId, "SWF", StringComparison.OrdinalIgnoreCase) &&
404+
(string.Equals(operationName, "PollForActivityTask", StringComparison.OrdinalIgnoreCase) ||
405+
string.Equals(operationName, "PollForDecisionTask", StringComparison.OrdinalIgnoreCase)))
406+
return true;
407+
408+
return false;
409+
}
410+
352411
private void SetRetryHeaders(IRequestContext requestContext)
353412
{
354413
var request = requestContext.Request;

sdk/src/Core/Amazon.Runtime/Pipeline/RetryHandler/RetryPolicy.cs

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ namespace Amazon.Runtime
3434
/// </summary>
3535
public abstract partial class RetryPolicy
3636
{
37+
/// <summary>
38+
/// Temporary feature flag for updated retry behavior improvements including
39+
/// revised backoff timing, updated retry quota costs, and other enhancements.
40+
/// Enabled by setting the AWS_NEW_RETRIES_2026 environment variable to "true".
41+
/// Defaults to false. This flag will be removed at end of 2026 when the new
42+
/// behavior becomes the default.
43+
/// </summary>
44+
internal static bool UseNewRetries2026 { get; set; } =
45+
string.Equals(Environment.GetEnvironmentVariable("AWS_NEW_RETRIES_2026"), "true", StringComparison.OrdinalIgnoreCase);
46+
3747
/// <summary>
3848
/// Maximum number of retries to be performed.
3949
/// This does not count the initial request.
@@ -146,8 +156,17 @@ public bool Retry(IExecutionContext executionContext, Exception exception)
146156
return false;
147157
}
148158

149-
executionContext.RequestContext.LastCapacityType = IsServiceTimeoutError(exception) ?
150-
CapacityManager.CapacityType.Timeout : CapacityManager.CapacityType.Retry;
159+
if (UseNewRetries2026)
160+
{
161+
executionContext.RequestContext.LastCapacityType = IsThrottlingError(exception) ?
162+
CapacityManager.CapacityType.Throttling : CapacityManager.CapacityType.Retry;
163+
StoreRetryAfterHeader(executionContext, exception);
164+
}
165+
else
166+
{
167+
executionContext.RequestContext.LastCapacityType = IsServiceTimeoutError(exception) ?
168+
CapacityManager.CapacityType.Timeout : CapacityManager.CapacityType.Retry;
169+
}
151170
return OnRetry(executionContext, isClockSkewError, IsThrottlingError(exception));
152171
}
153172
}
@@ -623,6 +642,39 @@ private static bool TryParseExceptionMessage(AmazonServiceException ase, out Dat
623642

624643
#endregion
625644

645+
/// <summary>
646+
/// Context attribute key for storing the x-amz-retry-after header value (in milliseconds).
647+
/// </summary>
648+
protected const string RetryAfterContextKey = "RetryAfterMs";
649+
650+
/// <summary>
651+
/// Extracts the x-amz-retry-after header from the error response and stores it in ContextAttributes.
652+
/// The header value is an integer representing milliseconds.
653+
/// </summary>
654+
private void StoreRetryAfterHeader(IExecutionContext executionContext, Exception exception)
655+
{
656+
// Remove any previously stored value
657+
executionContext.RequestContext.ContextAttributes.Remove(RetryAfterContextKey);
658+
659+
var serviceException = exception as AmazonServiceException;
660+
var webData = GetWebData(serviceException);
661+
if (webData == null)
662+
return;
663+
664+
var retryAfterValue = webData.GetHeaderValue("x-amz-retry-after");
665+
if (string.IsNullOrEmpty(retryAfterValue))
666+
return;
667+
668+
if (int.TryParse(retryAfterValue, out var retryAfterMs) && retryAfterMs >= 0)
669+
{
670+
executionContext.RequestContext.ContextAttributes[RetryAfterContextKey] = retryAfterMs;
671+
}
672+
else
673+
{
674+
Logger?.DebugFormat("Invalid x-amz-retry-after header value '{0}', falling back to exponential backoff.", retryAfterValue);
675+
}
676+
}
677+
626678
private static IWebResponseData GetWebData(AmazonServiceException ase)
627679
{
628680
if (ase != null)

0 commit comments

Comments
 (0)