concurrent gens setting, online status
This commit is contained in:
parent
d250ba6484
commit
ad9a91f1b5
|
@ -4,6 +4,7 @@ log_prompts: true
|
||||||
|
|
||||||
mode: oobabooga
|
mode: oobabooga
|
||||||
auth_required: false
|
auth_required: false
|
||||||
|
concurrent_gens: 3
|
||||||
|
|
||||||
backend_url: http://172.0.0.2:9104
|
backend_url: http://172.0.0.2:9104
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# Global settings that never change after startup
|
# Global settings that never change after startup
|
||||||
|
|
||||||
running_model = 'none'
|
running_model = 'none'
|
||||||
concurrent_generates = 3
|
concurrent_gens = 3
|
||||||
mode = 'oobabooga'
|
mode = 'oobabooga'
|
||||||
backend_url = None
|
backend_url = None
|
||||||
token_limit = 5555
|
token_limit = 5555
|
||||||
|
|
|
@ -5,10 +5,10 @@ from threading import Semaphore, Thread
|
||||||
|
|
||||||
from llm_server import opts
|
from llm_server import opts
|
||||||
from llm_server.integer import ThreadSafeInteger
|
from llm_server.integer import ThreadSafeInteger
|
||||||
from llm_server.opts import concurrent_generates
|
from llm_server.opts import concurrent_gens
|
||||||
|
|
||||||
proompters_1_min = 0
|
proompters_1_min = 0
|
||||||
concurrent_semaphore = Semaphore(concurrent_generates)
|
concurrent_semaphore = Semaphore(concurrent_gens)
|
||||||
proompts = ThreadSafeInteger(0)
|
proompts = ThreadSafeInteger(0)
|
||||||
start_time = datetime.now()
|
start_time = datetime.now()
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@ class SemaphoreCheckerThread(Thread):
|
||||||
def run(self):
|
def run(self):
|
||||||
global proompters_1_min
|
global proompters_1_min
|
||||||
while True:
|
while True:
|
||||||
self.values.append(opts.concurrent_generates - concurrent_semaphore._value)
|
self.values.append(opts.concurrent_gens - concurrent_semaphore._value)
|
||||||
proompters_1_min = sum(self.values) / len(self.values)
|
proompters_1_min = sum(self.values) / len(self.values)
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
|
|
@ -10,16 +10,25 @@ from .. import stats
|
||||||
from ..cache import cache
|
from ..cache import cache
|
||||||
from ..helpers.http import cache_control
|
from ..helpers.http import cache_control
|
||||||
from ..stats import proompters_1_min
|
from ..stats import proompters_1_min
|
||||||
|
from ...llm.oobabooga.info import get_running_model
|
||||||
|
|
||||||
|
|
||||||
@bp.route('/stats', methods=['GET'])
|
@bp.route('/stats', methods=['GET'])
|
||||||
@cache.cached(timeout=60, query_string=True)
|
@cache.cached(timeout=60, query_string=True)
|
||||||
@cache_control(60)
|
@cache_control(60)
|
||||||
def get_stats():
|
def get_stats():
|
||||||
|
model_list = get_running_model()
|
||||||
|
if isinstance(model_list, bool):
|
||||||
|
# get_running_model() will return False when the fetch fails
|
||||||
|
online = False
|
||||||
|
else:
|
||||||
|
online = True
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'proompters_now': opts.concurrent_generates - concurrent_semaphore._value,
|
'proompters_now': opts.concurrent_gens - concurrent_semaphore._value,
|
||||||
'proompters_1_min': proompters_1_min,
|
'proompters_1_min': proompters_1_min,
|
||||||
'total_proompts': stats.proompts.value,
|
'total_proompts': stats.proompts.value,
|
||||||
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
|
'uptime': int((datetime.now() - stats.start_time).total_seconds()),
|
||||||
|
'online': online,
|
||||||
'timestamp': int(time.time())
|
'timestamp': int(time.time())
|
||||||
}), 200
|
}), 200
|
||||||
|
|
|
@ -5,3 +5,4 @@ flask_caching
|
||||||
requests
|
requests
|
||||||
tiktoken
|
tiktoken
|
||||||
gunicorn
|
gunicorn
|
||||||
|
redis
|
|
@ -22,7 +22,7 @@ if config_path_environ:
|
||||||
else:
|
else:
|
||||||
config_path = Path(script_path, 'config', 'config.yml')
|
config_path = Path(script_path, 'config', 'config.yml')
|
||||||
|
|
||||||
default_vars = {'mode': 'oobabooga', 'log_prompts': False, 'database_path': './proxy-server.db', 'auth_required': False}
|
default_vars = {'mode': 'oobabooga', 'log_prompts': False, 'database_path': './proxy-server.db', 'auth_required': False, 'concurrent_gens': 3}
|
||||||
required_vars = []
|
required_vars = []
|
||||||
config_loader = ConfigLoader(config_path, default_vars, required_vars)
|
config_loader = ConfigLoader(config_path, default_vars, required_vars)
|
||||||
success, config, msg = config_loader.load_config()
|
success, config, msg = config_loader.load_config()
|
||||||
|
@ -44,6 +44,7 @@ if config['mode'] not in ['oobabooga', 'hf-textgen']:
|
||||||
opts.mode = config['mode']
|
opts.mode = config['mode']
|
||||||
opts.auth_required = config['auth_required']
|
opts.auth_required = config['auth_required']
|
||||||
opts.log_prompts = config['log_prompts']
|
opts.log_prompts = config['log_prompts']
|
||||||
|
opts.concurrent_gens = config['concurrent_gens']
|
||||||
|
|
||||||
opts.running_model = get_running_model()
|
opts.running_model = get_running_model()
|
||||||
|
|
||||||
|
|
Reference in New Issue