|
|
@@ -58,13 +58,19 @@ func (auth *ClientAuth) Apply(req *http.Request) {
|
|
|
}
|
|
|
|
|
|
//--------------------------------------------------------------------------
|
|
|
-// Rate Limited Error
|
|
|
+// Rate Limit Options
|
|
|
//--------------------------------------------------------------------------
|
|
|
|
|
|
// MaxRetryAfterDuration is the maximum amount of time we should ever wait
|
|
|
// during a retry. This is to prevent starvation on the request threads
|
|
|
const MaxRetryAfterDuration = 10 * time.Second
|
|
|
|
|
|
+// RateLimitRetryOpts contains retry options
|
|
|
+type RateLimitRetryOpts struct {
|
|
|
+ MaxRetries int
|
|
|
+ DefaultRetryWait time.Duration
|
|
|
+}
|
|
|
+
|
|
|
// RateLimitResponseStatus contains the status of the rate limited retries
|
|
|
type RateLimitResponseStatus struct {
|
|
|
RetriesRemaining int
|
|
|
@@ -105,14 +111,14 @@ func (rlre *RateLimitedResponseError) Error() string {
|
|
|
// RateLimitedPrometheusClient is a prometheus client which limits the total number of
|
|
|
// concurrent outbound requests allowed at a given moment.
|
|
|
type RateLimitedPrometheusClient struct {
|
|
|
- id string
|
|
|
- client prometheus.Client
|
|
|
- auth *ClientAuth
|
|
|
- queue collections.BlockingQueue
|
|
|
- decorator QueryParamsDecorator
|
|
|
- retryOnRateLimit bool
|
|
|
- outbound *atomic.AtomicInt32
|
|
|
- fileLogger *golog.Logger
|
|
|
+ id string
|
|
|
+ client prometheus.Client
|
|
|
+ auth *ClientAuth
|
|
|
+ queue collections.BlockingQueue
|
|
|
+ decorator QueryParamsDecorator
|
|
|
+ rateLimitRetry *RateLimitRetryOpts
|
|
|
+ outbound *atomic.AtomicInt32
|
|
|
+ fileLogger *golog.Logger
|
|
|
}
|
|
|
|
|
|
// requestCounter is used to determine if the prometheus client keeps track of
|
|
|
@@ -130,7 +136,7 @@ func NewRateLimitedClient(
|
|
|
maxConcurrency int,
|
|
|
auth *ClientAuth,
|
|
|
decorator QueryParamsDecorator,
|
|
|
- retryOnRateLimit bool,
|
|
|
+ rateLimitRetryOpts *RateLimitRetryOpts,
|
|
|
queryLogFile string) (prometheus.Client, error) {
|
|
|
|
|
|
queue := collections.NewBlockingQueue()
|
|
|
@@ -161,14 +167,14 @@ func NewRateLimitedClient(
|
|
|
}
|
|
|
|
|
|
rlpc := &RateLimitedPrometheusClient{
|
|
|
- id: id,
|
|
|
- client: client,
|
|
|
- queue: queue,
|
|
|
- decorator: decorator,
|
|
|
- retryOnRateLimit: retryOnRateLimit,
|
|
|
- outbound: outbound,
|
|
|
- auth: auth,
|
|
|
- fileLogger: logger,
|
|
|
+ id: id,
|
|
|
+ client: client,
|
|
|
+ queue: queue,
|
|
|
+ decorator: decorator,
|
|
|
+ rateLimitRetry: rateLimitRetryOpts,
|
|
|
+ outbound: outbound,
|
|
|
+ auth: auth,
|
|
|
+ fileLogger: logger,
|
|
|
}
|
|
|
|
|
|
// Start concurrent request processing
|
|
|
@@ -224,7 +230,8 @@ type workResponse struct {
|
|
|
|
|
|
// worker is used as a consumer goroutine to pull workRequest from the blocking queue and execute them
|
|
|
func (rlpc *RateLimitedPrometheusClient) worker() {
|
|
|
- retryRateLimit := rlpc.retryOnRateLimit
|
|
|
+ retryOpts := rlpc.rateLimitRetry
|
|
|
+ retryRateLimit := retryOpts != nil
|
|
|
|
|
|
for {
|
|
|
// blocks until there is an item available
|
|
|
@@ -262,15 +269,17 @@ func (rlpc *RateLimitedPrometheusClient) worker() {
|
|
|
// * If we couldn't determine how long to wait for a retry, use 1 second by default
|
|
|
if retryRateLimit {
|
|
|
var status []*RateLimitResponseStatus
|
|
|
- var retries int = 5
|
|
|
+ var retries int = retryOpts.MaxRetries
|
|
|
+ var defaultWait time.Duration = retryOpts.DefaultRetryWait
|
|
|
|
|
|
for httputil.IsRateLimited(res, body) && retries > 0 {
|
|
|
+ // calculate amount of time to wait before retry, in the event the default wait is used,
|
|
|
+ // an exponential backoff is applied based on the number of times we've retried.
|
|
|
+ retryAfter := httputil.RateLimitedRetryFor(res, defaultWait, retryOpts.MaxRetries-retries)
|
|
|
retries--
|
|
|
|
|
|
- // calculate amount of time to wait before retry, default to 1s
|
|
|
- retryAfter := httputil.RateLimitedRetryFor(res, time.Second)
|
|
|
status = append(status, &RateLimitResponseStatus{RetriesRemaining: retries, WaitTime: retryAfter})
|
|
|
- log.DedupedInfof(50, "Rate Limited Prometheus Request. Waiting for: %.2f seconds. Retries Remaining: %d", retryAfter.Seconds(), retries)
|
|
|
+ log.DedupedInfof(50, "Rate Limited Prometheus Request. Waiting for: %d ms. Retries Remaining: %d", retryAfter.Milliseconds(), retries)
|
|
|
|
|
|
// To prevent total starvation of request threads, hard limit wait time to 10s. We also want quota limits/throttles
|
|
|
// to eventually pass through as an error. For example, if some quota is reached with 10 days left, we clearly
|
|
|
@@ -340,14 +349,14 @@ func (rlpc *RateLimitedPrometheusClient) Do(ctx context.Context, req *http.Reque
|
|
|
|
|
|
// PrometheusClientConfig contains all configurable options for creating a new prometheus client
|
|
|
type PrometheusClientConfig struct {
|
|
|
- Timeout time.Duration
|
|
|
- KeepAlive time.Duration
|
|
|
- TLSHandshakeTimeout time.Duration
|
|
|
- TLSInsecureSkipVerify bool
|
|
|
- RetryOnRateLimitResponse bool
|
|
|
- Auth *ClientAuth
|
|
|
- QueryConcurrency int
|
|
|
- QueryLogFile string
|
|
|
+ Timeout time.Duration
|
|
|
+ KeepAlive time.Duration
|
|
|
+ TLSHandshakeTimeout time.Duration
|
|
|
+ TLSInsecureSkipVerify bool
|
|
|
+ RateLimitRetryOpts *RateLimitRetryOpts
|
|
|
+ Auth *ClientAuth
|
|
|
+ QueryConcurrency int
|
|
|
+ QueryLogFile string
|
|
|
}
|
|
|
|
|
|
// NewPrometheusClient creates a new rate limited client which limits by outbound concurrent requests.
|
|
|
@@ -379,7 +388,7 @@ func NewPrometheusClient(address string, config *PrometheusClientConfig) (promet
|
|
|
config.QueryConcurrency,
|
|
|
config.Auth,
|
|
|
nil,
|
|
|
- config.RetryOnRateLimitResponse,
|
|
|
+ config.RateLimitRetryOpts,
|
|
|
config.QueryLogFile,
|
|
|
)
|
|
|
}
|