diff --git a/README.md b/README.md index bd1b4e5..814ccd6 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,7 @@ To set up token auth, add rows to the `token_auth` table in the SQLite database. ### Use -**DO NOT** lose your database. It's used for calculating the estimated wait time based on average TPS and response tokens and if you lose those stats your numbers will be inaccurate until the database fills back up again. If you change graphics -cards, you should probably clear the `generation_time` time column in the `prompts` table. +**DO NOT** lose your database. It's used for calculating the estimated wait time based on average TPS and response tokens and if you lose those stats your numbers will be inaccurate until the database fills back up again. If you change GPUs, you should probably clear the `generation_time` time column in the `prompts` table. ### To Do diff --git a/llm_server/helpers.py b/llm_server/helpers.py index ed0847c..c7f42a0 100644 --- a/llm_server/helpers.py +++ b/llm_server/helpers.py @@ -1,3 +1,4 @@ +from collections import OrderedDict from pathlib import Path @@ -17,3 +18,24 @@ def safe_list_get(l, idx, default): return l[idx] except IndexError: return default + + +def deep_sort(obj): + """ + https://stackoverflow.com/a/59218649 + :param obj: + :return: + """ + if isinstance(obj, dict): + obj = OrderedDict(sorted(obj.items())) + for k, v in obj.items(): + if isinstance(v, dict) or isinstance(v, list): + obj[k] = deep_sort(v) + + if isinstance(obj, list): + for i, v in enumerate(obj): + if isinstance(v, dict) or isinstance(v, list): + obj[i] = deep_sort(v) + obj = sorted(obj, key=lambda x: json.dumps(x)) + + return obj diff --git a/llm_server/routes/v1/generate_stats.py b/llm_server/routes/v1/generate_stats.py index 15361a0..33d8da0 100644 --- a/llm_server/routes/v1/generate_stats.py +++ b/llm_server/routes/v1/generate_stats.py @@ -2,6 +2,7 @@ import time from datetime import datetime from llm_server import opts +from llm_server.helpers import deep_sort from llm_server.llm.info import get_running_model from llm_server.routes.cache import redis from llm_server.routes.queue import priority_queue @@ -35,8 +36,7 @@ def generate_stats(): else: raise Exception - # TODO: https://stackoverflow.com/questions/22721579/sorting-a-nested-ordereddict-by-key-recursively - return { + output = { 'stats': { 'proompts_in_queue': proompters_in_queue, 'proompters_1_min': SemaphoreCheckerThread.proompters_1_min, @@ -46,18 +46,21 @@ def generate_stats(): 'average_tps': average_tps, }, 'online': online, - 'mode': opts.mode, - 'model': model_list, 'endpoints': { 'blocking': opts.full_client_api, }, 'estimated_wait_sec': estimated_wait_sec, 'timestamp': int(time.time()), - 'openaiKeys': '∞', - 'anthropicKeys': '∞', 'config': { 'gatekeeper': 'none' if opts.auth_required is False else 'token', 'context_size': opts.context_size, 'queue_size': opts.concurrent_gens, - } + 'model': model_list, + 'mode': opts.mode, + }, + 'keys': { + 'openaiKeys': '∞', + 'anthropicKeys': '∞', + }, } + return deep_sort(output)