From 52e6965b5ee6036b4085876ff512d267816a00bf Mon Sep 17 00:00:00 2001 From: Cyberes Date: Mon, 25 Sep 2023 13:00:39 -0600 Subject: [PATCH] don't count SYSTEM tokens for recent prompters, fix sql exclude for SYSTEM tokens --- llm_server/routes/queue.py | 3 +++ llm_server/routes/request_handler.py | 2 +- llm_server/routes/stats.py | 10 +++++++++- llm_server/routes/v1/generate_stats.py | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/llm_server/routes/queue.py b/llm_server/routes/queue.py index d5d16a1..102c40a 100644 --- a/llm_server/routes/queue.py +++ b/llm_server/routes/queue.py @@ -104,6 +104,7 @@ def worker(): increment_ip_count(client_ip, 'processing_ips') + # TODO: only increment if not valid SYSTEM__ token redis.incr('active_gen_workers') start_time = time.time() @@ -118,6 +119,8 @@ def worker(): event.set((success, response, error_msg)) decrement_ip_count(client_ip, 'processing_ips') + + # TODO: only decrement if not valid SYSTEM__ token redis.decr('active_gen_workers') diff --git a/llm_server/routes/request_handler.py b/llm_server/routes/request_handler.py index 996ac8d..0c8f4fe 100644 --- a/llm_server/routes/request_handler.py +++ b/llm_server/routes/request_handler.py @@ -38,7 +38,7 @@ class RequestHandler: self.parameters = None self.used = False recent_prompters = redis.get_dict('recent_prompters') - recent_prompters[self.client_ip] = time.time() + recent_prompters[self.client_ip] = (time.time(), self.token) redis.set_dict('recent_prompters', recent_prompters) def get_auth_token(self): diff --git a/llm_server/routes/stats.py b/llm_server/routes/stats.py index d62b93d..802bad5 100644 --- a/llm_server/routes/stats.py +++ b/llm_server/routes/stats.py @@ -93,7 +93,15 @@ class SemaphoreCheckerThread(Thread): while True: current_time = time.time() recent_prompters = redis.get_dict('recent_prompters') - new_recent_prompters = {ip: timestamp for ip, timestamp in recent_prompters.items() if current_time - timestamp <= 300} + new_recent_prompters = {} + + for ip, (timestamp, token) in recent_prompters.items(): + # TODO: validate token + if token and token.startswith('SYSTEM__'): + continue + if current_time - timestamp <= 300: + new_recent_prompters[ip] = timestamp, token + redis.set_dict('recent_prompters', new_recent_prompters) redis.set('proompters_5_min', len(new_recent_prompters)) time.sleep(1) diff --git a/llm_server/routes/v1/generate_stats.py b/llm_server/routes/v1/generate_stats.py index 031e910..75cd373 100644 --- a/llm_server/routes/v1/generate_stats.py +++ b/llm_server/routes/v1/generate_stats.py @@ -35,7 +35,7 @@ def calculate_wait_time(gen_time_calc, proompters_in_queue, concurrent_gens, act # TODO: have routes/__init__.py point to the latest API version generate_stats() -@cache.memoize(timeout=20) +@cache.memoize(timeout=10) def generate_stats(): model_name, error = get_running_model() # will return False when the fetch fails if isinstance(model_name, bool):