From 74a0c53c7fb586a6575183aa28eb9de9d5c76a82 Mon Sep 17 00:00:00 2001 From: Bharat Date: Tue, 16 Jun 2026 14:16:12 +0000 Subject: [PATCH] INF-1671/fix: clamp and floor Retry-After in backoffDelay Treat the Retry-After header as a lower bound clamped to maxBackoff rather than an override. A hostile or misconfigured Retry-After: 999999 no longer parks the CLI for days, and Retry-After: 0 after a rate-limit reset no longer collapses into a zero-delay retry storm. Mirrors the twoadm-cli httpx hardening so the two CLIs keep the same retry semantics (INF-1314). --- internal/httpx/backoff_test.go | 31 +++++++++++++++++++++++ internal/httpx/client.go | 46 ++++++++++++++++++++++++++++------ 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/internal/httpx/backoff_test.go b/internal/httpx/backoff_test.go index 873a5fe..607f860 100644 --- a/internal/httpx/backoff_test.go +++ b/internal/httpx/backoff_test.go @@ -34,3 +34,34 @@ func TestBackoffDelayBadRetryAfterFallsBack(t *testing.T) { t.Errorf("garbage Retry-After should fall back to backoff, got %v", d) } } + +func TestBackoffDelayClampsLargeRetryAfter(t *testing.T) { + // A hostile or misconfigured upstream must not be able to park the CLI for + // days: the hint is clamped to maxBackoff. + resp := &http.Response{Header: http.Header{"Retry-After": []string{"999999"}}} + if d := backoffDelay(0, resp); d != maxBackoff { + t.Errorf("Retry-After: 999999 → %v, want clamp to %v", d, maxBackoff) + } +} + +func TestBackoffDelayFloorsZeroRetryAfter(t *testing.T) { + // Retry-After: 0 (e.g. a fleet-wide rate-limit reset) must not collapse + // into a zero-delay retry storm; it floors at the jittered backoff. + resp := &http.Response{Header: http.Header{"Retry-After": []string{"0"}}} + d := backoffDelay(0, resp) + if d < baseBackoff { + t.Errorf("Retry-After: 0 → %v, want at least baseBackoff %v", d, baseBackoff) + } + if d > maxBackoff+maxBackoff/4 { + t.Errorf("Retry-After: 0 → %v exceeds cap+jitter window", d) + } +} + +func TestBackoffDelayRetryAfterIsLowerBoundNotCeiling(t *testing.T) { + // At a high attempt our own backoff already sits at the cap; a small + // Retry-After must not shrink it below that. + resp := &http.Response{Header: http.Header{"Retry-After": []string{"1"}}} + if d := backoffDelay(6, resp); d < maxBackoff { + t.Errorf("backoff at attempt 6 should be >= %v despite Retry-After: 1, got %v", maxBackoff, d) + } +} diff --git a/internal/httpx/client.go b/internal/httpx/client.go index fca1dbf..2246744 100644 --- a/internal/httpx/client.go +++ b/internal/httpx/client.go @@ -183,16 +183,24 @@ func shouldRetry(resp *http.Response, err error) bool { return false } -// backoffDelay honours `Retry-After` when set, otherwise applies exponential -// backoff with jitter so a thundering herd doesn't synchronise. +// backoffDelay applies exponential backoff with jitter so a thundering herd +// doesn't synchronise, and treats `Retry-After` as a lower bound rather than an +// override: the server hint is honoured only when it exceeds our own backoff, +// and is clamped to maxBackoff. This keeps `Retry-After: 0` (e.g. a fleet-wide +// rate-limit reset) from collapsing into a zero-delay retry storm, and stops a +// hostile or misconfigured `Retry-After: 999999` from parking the CLI for days. +// Mirrors twoadm-cli internal/httpx (see INF-1314). func backoffDelay(attempt int, resp *http.Response) time.Duration { - if resp != nil { - if v := resp.Header.Get("Retry-After"); v != "" { - if secs, err := strconv.Atoi(strings.TrimSpace(v)); err == nil && secs >= 0 { - return time.Duration(secs) * time.Second - } - } + d := jitteredBackoff(attempt) + if hint := retryAfter(resp); hint > d { + d = hint } + return d +} + +// jitteredBackoff returns the exponential backoff for `attempt` (0-based) with +// [0, 25%) additive jitter, capped at maxBackoff. +func jitteredBackoff(attempt int) time.Duration { // Clamp attempt so the shift below can't overflow time.Duration on // pathological MaxRetries values. baseBackoff << 6 = 16s, already // past the cap, so any attempt ≥ 6 lands on maxBackoff anyway. @@ -209,3 +217,25 @@ func backoffDelay(attempt int, resp *http.Response) time.Duration { } return d + time.Duration(rand.Int64N(jitter)) } + +// retryAfter parses the `Retry-After` header (seconds form), clamped to +// maxBackoff. Returns 0 when the header is absent, negative, or unparseable so +// the caller falls back to jittered exponential backoff. +func retryAfter(resp *http.Response) time.Duration { + if resp == nil { + return 0 + } + v := strings.TrimSpace(resp.Header.Get("Retry-After")) + if v == "" { + return 0 + } + secs, err := strconv.Atoi(v) + if err != nil || secs < 0 { + return 0 + } + d := time.Duration(secs) * time.Second + if d > maxBackoff { + return maxBackoff + } + return d +}