adjusts claude rate limit handling to retry more aggressively
This commit is contained in:
parent
efa1b03570
commit
c749e2d57d
|
@ -40,10 +40,16 @@ export interface AnthropicKey extends Key {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* We don't get rate limit headers from Anthropic so if we get a 429, we just
|
* Upon being rate limited, a key will be locked out for this many milliseconds
|
||||||
* lock out the key for a few seconds
|
* while we wait for other concurrent requests to finish.
|
||||||
*/
|
*/
|
||||||
const RATE_LIMIT_LOCKOUT = 5000;
|
const RATE_LIMIT_LOCKOUT = 2000;
|
||||||
|
/**
|
||||||
|
* Upon assigning a key, we will wait this many milliseconds before allowing it
|
||||||
|
* to be used again. This is to prevent the queue from flooding a key with too
|
||||||
|
* many requests while we wait to learn whether previous ones succeeded.
|
||||||
|
*/
|
||||||
|
const KEY_REUSE_DELAY = 500;
|
||||||
|
|
||||||
export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
readonly service = "anthropic";
|
readonly service = "anthropic";
|
||||||
|
@ -129,7 +135,7 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
// Intended to throttle the queue processor as otherwise it will just
|
// Intended to throttle the queue processor as otherwise it will just
|
||||||
// flood the API with requests and we want to wait a sec to see if we're
|
// flood the API with requests and we want to wait a sec to see if we're
|
||||||
// going to get a rate limit error on this key.
|
// going to get a rate limit error on this key.
|
||||||
selectedKey.rateLimitedUntil = now + 1000;
|
selectedKey.rateLimitedUntil = now + KEY_REUSE_DELAY;
|
||||||
return { ...selectedKey };
|
return { ...selectedKey };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -183,15 +189,9 @@ export class AnthropicKeyProvider implements KeyProvider<AnthropicKey> {
|
||||||
/**
|
/**
|
||||||
* This is called when we receive a 429, which means there are already five
|
* This is called when we receive a 429, which means there are already five
|
||||||
* concurrent requests running on this key. We don't have any information on
|
* concurrent requests running on this key. We don't have any information on
|
||||||
* when these requests will resolve so all we can do is wait a bit and try
|
* when these requests will resolve, so all we can do is wait a bit and try
|
||||||
* again.
|
* again. We will lock the key for 2 seconds after getting a 429 before
|
||||||
* We will lock the key for 10 seconds, which should let a few of the other
|
* retrying in order to give the other requests a chance to finish.
|
||||||
* generations finish. This is an arbitrary number but the goal is to balance
|
|
||||||
* between not hammering the API with requests and not locking out a key that
|
|
||||||
* is actually available.
|
|
||||||
* TODO; Try to assign requests to slots on each key so we have an idea of how
|
|
||||||
* long each slot has been running and can make a more informed decision on
|
|
||||||
* how long to lock the key.
|
|
||||||
*/
|
*/
|
||||||
public markRateLimited(keyHash: string) {
|
public markRateLimited(keyHash: string) {
|
||||||
this.log.warn({ key: keyHash }, "Key rate limited");
|
this.log.warn({ key: keyHash }, "Key rate limited");
|
||||||
|
|
Loading…
Reference in New Issue