This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
local-llm-server/llm_server/routes/queue.py

99 lines
2.9 KiB
Python
Raw Normal View History

import json
import pickle
import time
from uuid import uuid4
from redis import Redis
2023-08-23 20:12:38 -06:00
from llm_server import opts
from llm_server.custom_redis import redis
2023-09-28 03:44:30 -06:00
def increment_ip_count(client_ip: str, redis_key):
redis.hincrby(redis_key, client_ip, 1)
2023-09-28 03:44:30 -06:00
def decrement_ip_count(client_ip: str, redis_key):
new_count = redis.hincrby(redis_key, client_ip, -1)
if new_count <= 0:
redis.hdel(redis_key, client_ip)
2023-08-23 20:12:38 -06:00
class RedisPriorityQueue:
2023-08-23 20:12:38 -06:00
def __init__(self):
self.redis = Redis(host='localhost', port=6379, db=15)
self.pubsub = self.redis.pubsub()
self.pubsub.subscribe('events')
2023-08-23 20:12:38 -06:00
def put(self, item, priority):
event = DataEvent()
2023-09-27 19:39:04 -06:00
# Check if the IP is already in the dictionary and if it has reached the limit
ip_count = self.redis.hget('queued_ip_count', item[1])
if ip_count:
ip_count = int(ip_count)
if ip_count and int(ip_count) >= opts.simultaneous_requests_per_ip and priority != 0:
2023-09-27 19:39:04 -06:00
print(f'Rejecting request from {item[1]} - {ip_count} requests in progress.')
return None # reject the request
2023-09-27 19:39:04 -06:00
self.redis.zadd('queue', {json.dumps((item, event.event_id)): -priority})
self.increment_ip_count(item[1], 'queued_ip_count')
2023-08-23 20:12:38 -06:00
return event
def get(self):
while True:
data = self.redis.zpopmin('queue')
if data:
item = json.loads(data[0][0])
client_ip = item[0][1]
2023-09-27 19:39:04 -06:00
self.decrement_ip_count(client_ip, 'queued_ip_count')
return item
2023-09-28 01:34:15 -06:00
time.sleep(0.1) # wait for something to be added to the queue
2023-09-28 03:44:30 -06:00
def increment_ip_count(self, client_ip: str, redis_key):
self.redis.hincrby(redis_key, client_ip, 1)
2023-09-28 03:44:30 -06:00
def decrement_ip_count(self, client_ip: str, redis_key):
new_count = self.redis.hincrby(redis_key, client_ip, -1)
if new_count <= 0:
self.redis.hdel(redis_key, client_ip)
2023-08-23 20:33:49 -06:00
def __len__(self):
return self.redis.zcard('queue')
2023-08-23 20:12:38 -06:00
2023-09-28 03:44:30 -06:00
def get_queued_ip_count(self, client_ip: str):
q = self.redis.hget('queued_ip_count', client_ip)
if not q:
return 0
return 0
2023-08-23 20:12:38 -06:00
class DataEvent:
def __init__(self, event_id=None):
self.event_id = event_id if event_id else str(uuid4())
self.redis = Redis(host='localhost', port=6379, db=14)
self.pubsub = self.redis.pubsub()
self.pubsub.subscribe(self.event_id)
2023-08-23 20:12:38 -06:00
def set(self, data):
self.redis.publish(self.event_id, pickle.dumps(data))
2023-08-23 20:12:38 -06:00
def wait(self):
for item in self.pubsub.listen():
if item['type'] == 'message':
return pickle.loads(item['data'])
priority_queue = RedisPriorityQueue()
2023-09-28 08:47:39 -06:00
def incr_active_workers():
redis.incr('active_gen_workers')
def decr_active_workers():
redis.decr('active_gen_workers')
2023-09-29 00:09:44 -06:00
new_count = redis.get('active_gen_workers', 0, dtype=int)
2023-09-28 08:47:39 -06:00
if new_count < 0:
redis.set('active_gen_workers', 0)