don't count SYSTEM tokens for recent prompters, fix sql exclude for SYSTEM tokens

2023-09-25 13:00:39 -06:00 · 2023-09-25 13:00:39 -06:00 · 52e6965b5e
parent 3eaabc8c35
commit 52e6965b5e
4 changed files with 14 additions and 3 deletions
--- a/llm_server/routes/queue.py
+++ b/llm_server/routes/queue.py
@ -104,6 +104,7 @@ def worker():
        increment_ip_count(client_ip, 'processing_ips')
        # TODO: only increment if not valid SYSTEM__ token
        redis.incr('active_gen_workers')
        start_time = time.time()
@ -118,6 +119,8 @@ def worker():
        event.set((success, response, error_msg))
        decrement_ip_count(client_ip, 'processing_ips')
        # TODO: only decrement if not valid SYSTEM__ token
        redis.decr('active_gen_workers')
--- a/llm_server/routes/request_handler.py
+++ b/llm_server/routes/request_handler.py
@ -38,7 +38,7 @@ class RequestHandler:
        self.parameters = None
        self.used = False
        recent_prompters = redis.get_dict('recent_prompters')
-        recent_prompters[self.client_ip] = time.time()
+        recent_prompters[self.client_ip] = (time.time(), self.token)
        redis.set_dict('recent_prompters', recent_prompters)
    def get_auth_token(self):
--- a/llm_server/routes/stats.py
+++ b/llm_server/routes/stats.py
@ -93,7 +93,15 @@ class SemaphoreCheckerThread(Thread):
        while True:
            current_time = time.time()
            recent_prompters = redis.get_dict('recent_prompters')
-            new_recent_prompters = {ip: timestamp for ip, timestamp in recent_prompters.items() if current_time - timestamp <= 300}
+            new_recent_prompters = {}
            for ip, (timestamp, token) in recent_prompters.items():
                # TODO: validate token
                if token and token.startswith('SYSTEM__'):
                    continue
                if current_time - timestamp <= 300:
                    new_recent_prompters[ip] = timestamp, token
            redis.set_dict('recent_prompters', new_recent_prompters)
            redis.set('proompters_5_min', len(new_recent_prompters))
            time.sleep(1)
--- a/llm_server/routes/v1/generate_stats.py
+++ b/llm_server/routes/v1/generate_stats.py
@ -35,7 +35,7 @@ def calculate_wait_time(gen_time_calc, proompters_in_queue, concurrent_gens, act
 # TODO: have routes/__init__.py point to the latest API version generate_stats()
-@cache.memoize(timeout=20)
+@cache.memoize(timeout=10)
 def generate_stats():
    model_name, error = get_running_model()  # will return False when the fetch fails
    if isinstance(model_name, bool):