handle model offline
This commit is contained in:
parent
754a4cbdf3
commit
5e90fa54d4
|
@ -15,6 +15,10 @@ class OobaRequestHandler(RequestHandler):
|
||||||
|
|
||||||
def handle_request(self, return_ok: bool = True):
|
def handle_request(self, return_ok: bool = True):
|
||||||
assert not self.used
|
assert not self.used
|
||||||
|
if self.offline:
|
||||||
|
msg = f'{self.selected_model} is not a valid model choice.'
|
||||||
|
print(msg)
|
||||||
|
return jsonify({'results': [{'text': format_sillytavern_err(msg)}]}), 200
|
||||||
|
|
||||||
request_valid, invalid_response = self.validate_request()
|
request_valid, invalid_response = self.validate_request()
|
||||||
if not request_valid:
|
if not request_valid:
|
||||||
|
|
|
@ -13,6 +13,7 @@ from llm_server.helpers import auto_set_base_client_api
|
||||||
from llm_server.llm.oobabooga.ooba_backend import OobaboogaBackend
|
from llm_server.llm.oobabooga.ooba_backend import OobaboogaBackend
|
||||||
from llm_server.llm.vllm.vllm_backend import VLLMBackend
|
from llm_server.llm.vllm.vllm_backend import VLLMBackend
|
||||||
from llm_server.routes.auth import parse_token
|
from llm_server.routes.auth import parse_token
|
||||||
|
from llm_server.routes.helpers.client import format_sillytavern_err
|
||||||
from llm_server.routes.helpers.http import require_api_key, validate_json
|
from llm_server.routes.helpers.http import require_api_key, validate_json
|
||||||
from llm_server.routes.queue import priority_queue
|
from llm_server.routes.queue import priority_queue
|
||||||
|
|
||||||
|
@ -42,6 +43,11 @@ class RequestHandler:
|
||||||
if not self.cluster_backend_info.get('model'):
|
if not self.cluster_backend_info.get('model'):
|
||||||
print('keyerror: mode -', selected_model, self.backend_url, self.cluster_backend_info)
|
print('keyerror: mode -', selected_model, self.backend_url, self.cluster_backend_info)
|
||||||
|
|
||||||
|
if not self.cluster_backend_info.get('mode') or not self.cluster_backend_info.get('model'):
|
||||||
|
self.offline = True
|
||||||
|
else:
|
||||||
|
self.offline = False
|
||||||
|
|
||||||
self.selected_model = self.cluster_backend_info['model']
|
self.selected_model = self.cluster_backend_info['model']
|
||||||
self.backend = get_backend_handler(self.cluster_backend_info['mode'], self.backend_url)
|
self.backend = get_backend_handler(self.cluster_backend_info['mode'], self.backend_url)
|
||||||
self.parameters = None
|
self.parameters = None
|
||||||
|
@ -215,8 +221,11 @@ class RequestHandler:
|
||||||
|
|
||||||
def handle_request(self) -> Tuple[flask.Response, int]:
|
def handle_request(self) -> Tuple[flask.Response, int]:
|
||||||
# Must include this in your child.
|
# Must include this in your child.
|
||||||
# if self.used:
|
# assert not self.used
|
||||||
# raise Exception('Can only use a RequestHandler object once.')
|
# if self.offline:
|
||||||
|
# msg = f'{self.selected_model} is not a valid model choice.'
|
||||||
|
# print(msg)
|
||||||
|
# return format_sillytavern_err(msg)
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def handle_ratelimited(self, do_log: bool = True) -> Tuple[flask.Response, int]:
|
def handle_ratelimited(self, do_log: bool = True) -> Tuple[flask.Response, int]:
|
||||||
|
|
Reference in New Issue