exclude tokens with priority 0 from simultaneous requests ratelimit

This commit is contained in:
Cyberes 2023-08-28 00:03:25 -06:00
parent c16d70a24d
commit 6c0e60135d
3 changed files with 3 additions and 4 deletions

View File

@ -22,7 +22,7 @@ class PriorityQueue:
event = DataEvent()
with self._cv:
# Check if the IP is already in the dictionary and if it has reached the limit
if item[1] in self._ip_count and self._ip_count[item[1]] >= opts.ip_in_queue_max:
if item[1] in self._ip_count and self._ip_count[item[1]] >= opts.ip_in_queue_max and priority != 0:
return None # reject the request
heapq.heappush(self._queue, (-priority, self._index, item, event))
self._index += 1

View File

@ -51,7 +51,7 @@ def generate():
else:
print(f'Token {token} was given priority {priority}.')
if not redis.sismember('processing_ips', client_ip):
if not redis.sismember('processing_ips', client_ip) or priority == 0:
event = priority_queue.put((request_json_body, client_ip, token, parameters), priority)
else:
event = None
@ -69,8 +69,6 @@ def generate():
else:
raise Exception
return jsonify({
# 'code': 429,
# 'error': f'no more than {opts.ip_in_queue_max} simultaneous requests per IP',
**response_json_body
}), 200

View File

@ -95,6 +95,7 @@ def generate_stats():
'queue_size': opts.concurrent_gens,
'model': model_name,
'mode': opts.mode,
'simultaneous_requests': opts.ip_in_queue_max,
},
'keys': {
'openaiKeys': '',