Merge cluster to master #3

Merged
cyberes merged 163 commits from cluster into master 2023-10-27 19:19:22 -06:00
3 changed files with 11 additions and 12 deletions
Showing only changes of commit 67f5df9bb9 - Show all commits

View File

@ -61,7 +61,10 @@ def get_model_choices(regen: bool = False):
model_choices = dict(sorted(model_choices.items(), key=lambda item: item[0].upper()))
default_backend_url = get_a_cluster_backend()
default_model = cluster_config.get_backend(default_backend_url)['model']
default_backend_info = cluster_config.get_backend(default_backend_url)
if not default_backend_info.get('model'):
return None, None
default_model = default_backend_info['model']
redis.setp('model_choices', (model_choices, default_model))
return model_choices, default_model

View File

@ -2,7 +2,6 @@ import time
from datetime import datetime
from llm_server import opts
from llm_server.cluster.backend import get_a_cluster_backend
from llm_server.cluster.cluster_config import cluster_config
from llm_server.cluster.model_choices import get_model_choices
from llm_server.custom_redis import redis
@ -17,17 +16,17 @@ def generate_stats(regen: bool = False):
if c:
return c
default_backend_url = get_a_cluster_backend()
default_backend_info = cluster_config.get_backend(default_backend_url)
if not default_backend_info.get('mode'):
return
model_choices, default_model = get_model_choices(regen=True)
if not model_choices or not default_model:
return 'Please wait for Redis to be populated...'
base_client_api = redis.get('base_client_api', dtype=str)
proompters_5_min = len(redis.zrangebyscore('recent_prompters', time.time() - 5 * 60, '+inf'))
output = {
'default': {
'model': default_backend_info['model'],
'backend': default_backend_url,
'models': {
'choices': model_choices,
'default': default_model,
},
'stats': {
'proompters': {
@ -76,8 +75,6 @@ def generate_stats(regen: bool = False):
else:
output['backend_info'] = {}
output['default_model'] = get_model_choices(regen=True)[1]
result = deep_sort(output)
# It may take a bit to get the base client API, so don't cache until then.

View File

@ -24,7 +24,6 @@ from llm_server.routes.server_error import handle_server_error
from llm_server.routes.v1 import bp
from llm_server.sock import init_socketio
# TODO: redis SCAN vs KEYS??
# TODO: implement blind RRD controlled via header and only used when there is a queue on the primary backend(s)
# TODO: is frequency penalty the same as ooba repetition penalty???