Merge cluster to master #3
|
@ -61,7 +61,10 @@ def get_model_choices(regen: bool = False):
|
|||
model_choices = dict(sorted(model_choices.items(), key=lambda item: item[0].upper()))
|
||||
|
||||
default_backend_url = get_a_cluster_backend()
|
||||
default_model = cluster_config.get_backend(default_backend_url)['model']
|
||||
default_backend_info = cluster_config.get_backend(default_backend_url)
|
||||
if not default_backend_info.get('model'):
|
||||
return None, None
|
||||
default_model = default_backend_info['model']
|
||||
|
||||
redis.setp('model_choices', (model_choices, default_model))
|
||||
return model_choices, default_model
|
||||
|
|
|
@ -2,7 +2,6 @@ import time
|
|||
from datetime import datetime
|
||||
|
||||
from llm_server import opts
|
||||
from llm_server.cluster.backend import get_a_cluster_backend
|
||||
from llm_server.cluster.cluster_config import cluster_config
|
||||
from llm_server.cluster.model_choices import get_model_choices
|
||||
from llm_server.custom_redis import redis
|
||||
|
@ -17,17 +16,17 @@ def generate_stats(regen: bool = False):
|
|||
if c:
|
||||
return c
|
||||
|
||||
default_backend_url = get_a_cluster_backend()
|
||||
default_backend_info = cluster_config.get_backend(default_backend_url)
|
||||
if not default_backend_info.get('mode'):
|
||||
return
|
||||
model_choices, default_model = get_model_choices(regen=True)
|
||||
if not model_choices or not default_model:
|
||||
return 'Please wait for Redis to be populated...'
|
||||
|
||||
base_client_api = redis.get('base_client_api', dtype=str)
|
||||
proompters_5_min = len(redis.zrangebyscore('recent_prompters', time.time() - 5 * 60, '+inf'))
|
||||
|
||||
output = {
|
||||
'default': {
|
||||
'model': default_backend_info['model'],
|
||||
'backend': default_backend_url,
|
||||
'models': {
|
||||
'choices': model_choices,
|
||||
'default': default_model,
|
||||
},
|
||||
'stats': {
|
||||
'proompters': {
|
||||
|
@ -76,8 +75,6 @@ def generate_stats(regen: bool = False):
|
|||
else:
|
||||
output['backend_info'] = {}
|
||||
|
||||
output['default_model'] = get_model_choices(regen=True)[1]
|
||||
|
||||
result = deep_sort(output)
|
||||
|
||||
# It may take a bit to get the base client API, so don't cache until then.
|
||||
|
|
|
@ -24,7 +24,6 @@ from llm_server.routes.server_error import handle_server_error
|
|||
from llm_server.routes.v1 import bp
|
||||
from llm_server.sock import init_socketio
|
||||
|
||||
|
||||
# TODO: redis SCAN vs KEYS??
|
||||
# TODO: implement blind RRD controlled via header and only used when there is a queue on the primary backend(s)
|
||||
# TODO: is frequency penalty the same as ooba repetition penalty???
|
||||
|
|
Reference in New Issue