diff --git a/llm_server/routes/stats.py b/llm_server/routes/stats.py index 41f0fde..15d0104 100644 --- a/llm_server/routes/stats.py +++ b/llm_server/routes/stats.py @@ -1,10 +1,31 @@ +import collections +import time from datetime import datetime -from threading import Semaphore +from threading import Semaphore, Thread from llm_server.integer import ThreadSafeInteger from llm_server.opts import concurrent_generates +proompters_1_min = 0 concurrent_semaphore = Semaphore(concurrent_generates) proompts = ThreadSafeInteger(0) start_time = datetime.now() + +class SemaphoreCheckerThread(Thread): + def __init__(self, semaphore): + Thread.__init__(self) + self.semaphore = semaphore + self.values = collections.deque(maxlen=60) + self.daemon = True + + def run(self): + global proompters_1_min + while True: + self.values.append(self.semaphore) + proompters_1_min = sum(self.values) / len(self.values) + time.sleep(1) + + +thread = SemaphoreCheckerThread(concurrent_semaphore) +thread.start() diff --git a/llm_server/routes/v1/proxy.py b/llm_server/routes/v1/proxy.py index 0e894ab..92c9427 100644 --- a/llm_server/routes/v1/proxy.py +++ b/llm_server/routes/v1/proxy.py @@ -4,11 +4,12 @@ from datetime import datetime from flask import jsonify from llm_server import opts +from llm_server.routes.v1.generate import concurrent_semaphore from . import bp from .. import stats -from llm_server.routes.v1.generate import concurrent_semaphore from ..cache import cache from ..helpers.http import cache_control +from ..stats import proompters_1_min @bp.route('/stats', methods=['GET']) @@ -17,6 +18,7 @@ from ..helpers.http import cache_control def get_stats(): return jsonify({ 'proompters_now': opts.concurrent_generates - concurrent_semaphore._value, + 'proompters_1_min': proompters_1_min, 'total_proompts': stats.proompts.value, 'uptime': int((datetime.now() - stats.start_time).total_seconds()), 'timestamp': int(time.time())