Merge cluster to master #3
|
@ -16,7 +16,7 @@ class OobaRequestHandler(RequestHandler):
|
|||
def handle_request(self, return_ok: bool = True):
|
||||
assert not self.used
|
||||
if self.offline:
|
||||
msg = f'{self.selected_model} is not a valid model choice.'
|
||||
msg = 'The model you requested is not a valid choice. Please retry your query.'
|
||||
print(msg)
|
||||
self.handle_error(msg)
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
from llm_server.routes.queue import priority_queue
|
||||
|
||||
try:
|
||||
import gevent.monkey
|
||||
|
||||
|
@ -26,6 +24,7 @@ from llm_server.routes.server_error import handle_server_error
|
|||
from llm_server.routes.v1 import bp
|
||||
from llm_server.sock import init_socketio
|
||||
|
||||
# TODO: return an `error: True`, error code, and error message rather than just a formatted message
|
||||
# TODO: what happens when all backends are offline? What about the "online" key in the stats page?
|
||||
# TODO: redis SCAN vs KEYS??
|
||||
# TODO: implement blind RRD controlled via header and only used when there is a queue on the primary backend(s)
|
||||
|
|
Reference in New Issue