exclude tokens with priority 0 from simultaneous requests ratelimit
This commit is contained in:
parent
c16d70a24d
commit
6c0e60135d
|
@ -22,7 +22,7 @@ class PriorityQueue:
|
|||
event = DataEvent()
|
||||
with self._cv:
|
||||
# Check if the IP is already in the dictionary and if it has reached the limit
|
||||
if item[1] in self._ip_count and self._ip_count[item[1]] >= opts.ip_in_queue_max:
|
||||
if item[1] in self._ip_count and self._ip_count[item[1]] >= opts.ip_in_queue_max and priority != 0:
|
||||
return None # reject the request
|
||||
heapq.heappush(self._queue, (-priority, self._index, item, event))
|
||||
self._index += 1
|
||||
|
|
|
@ -51,7 +51,7 @@ def generate():
|
|||
else:
|
||||
print(f'Token {token} was given priority {priority}.')
|
||||
|
||||
if not redis.sismember('processing_ips', client_ip):
|
||||
if not redis.sismember('processing_ips', client_ip) or priority == 0:
|
||||
event = priority_queue.put((request_json_body, client_ip, token, parameters), priority)
|
||||
else:
|
||||
event = None
|
||||
|
@ -69,8 +69,6 @@ def generate():
|
|||
else:
|
||||
raise Exception
|
||||
return jsonify({
|
||||
# 'code': 429,
|
||||
# 'error': f'no more than {opts.ip_in_queue_max} simultaneous requests per IP',
|
||||
**response_json_body
|
||||
}), 200
|
||||
|
||||
|
|
|
@ -95,6 +95,7 @@ def generate_stats():
|
|||
'queue_size': opts.concurrent_gens,
|
||||
'model': model_name,
|
||||
'mode': opts.mode,
|
||||
'simultaneous_requests': opts.ip_in_queue_max,
|
||||
},
|
||||
'keys': {
|
||||
'openaiKeys': '∞',
|
||||
|
|
Reference in New Issue