diff --git a/config/config.yml b/config/config.yml index 3ebb664..67026da 100644 --- a/config/config.yml +++ b/config/config.yml @@ -4,6 +4,7 @@ log_prompts: true mode: oobabooga auth_required: false +concurrent_gens: 3 backend_url: http://172.0.0.2:9104 diff --git a/llm_server/opts.py b/llm_server/opts.py index 9536c13..8ec0f7f 100644 --- a/llm_server/opts.py +++ b/llm_server/opts.py @@ -1,7 +1,7 @@ # Global settings that never change after startup running_model = 'none' -concurrent_generates = 3 +concurrent_gens = 3 mode = 'oobabooga' backend_url = None token_limit = 5555 diff --git a/llm_server/routes/stats.py b/llm_server/routes/stats.py index 572bb30..78fd9ef 100644 --- a/llm_server/routes/stats.py +++ b/llm_server/routes/stats.py @@ -5,10 +5,10 @@ from threading import Semaphore, Thread from llm_server import opts from llm_server.integer import ThreadSafeInteger -from llm_server.opts import concurrent_generates +from llm_server.opts import concurrent_gens proompters_1_min = 0 -concurrent_semaphore = Semaphore(concurrent_generates) +concurrent_semaphore = Semaphore(concurrent_gens) proompts = ThreadSafeInteger(0) start_time = datetime.now() @@ -23,7 +23,7 @@ class SemaphoreCheckerThread(Thread): def run(self): global proompters_1_min while True: - self.values.append(opts.concurrent_generates - concurrent_semaphore._value) + self.values.append(opts.concurrent_gens - concurrent_semaphore._value) proompters_1_min = sum(self.values) / len(self.values) time.sleep(1) diff --git a/llm_server/routes/v1/proxy.py b/llm_server/routes/v1/proxy.py index 92c9427..4357923 100644 --- a/llm_server/routes/v1/proxy.py +++ b/llm_server/routes/v1/proxy.py @@ -10,16 +10,25 @@ from .. import stats from ..cache import cache from ..helpers.http import cache_control from ..stats import proompters_1_min +from ...llm.oobabooga.info import get_running_model @bp.route('/stats', methods=['GET']) @cache.cached(timeout=60, query_string=True) @cache_control(60) def get_stats(): + model_list = get_running_model() + if isinstance(model_list, bool): + # get_running_model() will return False when the fetch fails + online = False + else: + online = True + return jsonify({ - 'proompters_now': opts.concurrent_generates - concurrent_semaphore._value, + 'proompters_now': opts.concurrent_gens - concurrent_semaphore._value, 'proompters_1_min': proompters_1_min, 'total_proompts': stats.proompts.value, 'uptime': int((datetime.now() - stats.start_time).total_seconds()), + 'online': online, 'timestamp': int(time.time()) }), 200 diff --git a/requirements.txt b/requirements.txt index b309450..695028f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ pyyaml flask_caching requests tiktoken -gunicorn \ No newline at end of file +gunicorn +redis \ No newline at end of file diff --git a/server.py b/server.py index 349f9a0..4927ed9 100644 --- a/server.py +++ b/server.py @@ -22,7 +22,7 @@ if config_path_environ: else: config_path = Path(script_path, 'config', 'config.yml') -default_vars = {'mode': 'oobabooga', 'log_prompts': False, 'database_path': './proxy-server.db', 'auth_required': False} +default_vars = {'mode': 'oobabooga', 'log_prompts': False, 'database_path': './proxy-server.db', 'auth_required': False, 'concurrent_gens': 3} required_vars = [] config_loader = ConfigLoader(config_path, default_vars, required_vars) success, config, msg = config_loader.load_config() @@ -44,6 +44,7 @@ if config['mode'] not in ['oobabooga', 'hf-textgen']: opts.mode = config['mode'] opts.auth_required = config['auth_required'] opts.log_prompts = config['log_prompts'] +opts.concurrent_gens = config['concurrent_gens'] opts.running_model = get_running_model()