From 28c250385df691e8fc610e864434dd71c047be21 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Fri, 27 Oct 2023 19:00:49 -0600 Subject: [PATCH] add todo --- server.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server.py b/server.py index 9603350..aa8ef1a 100644 --- a/server.py +++ b/server.py @@ -34,12 +34,11 @@ from llm_server.sock import init_wssocket # TODO: return an `error: True`, error code, and error message rather than just a formatted message # TODO: what happens when all backends are offline? What about the "online" key in the stats page? # TODO: redis SCAN vs KEYS?? -# TODO: implement blind RRD controlled via header and only used when there is a queue on the primary backend(s) # TODO: is frequency penalty the same as ooba repetition penalty??? # TODO: make sure openai_moderation_enabled works on websockets, completions, and chat completions -# TODO: if a backend is at its limit of concurrent requests, choose a different one # Lower priority +# TODO: if a backend is at its limit of concurrent requests, choose a different one # TODO: make error messages consitient # TODO: support logit_bias on OpenAI and Ooba endpoints. # TODO: add a way to cancel VLLM gens. Maybe use websockets?