38 lines
1.4 KiB
Python
38 lines
1.4 KiB
Python
from typing import Tuple, Union
|
|
|
|
import flask
|
|
|
|
from llm_server import opts
|
|
from llm_server.llm.backend import tokenizer
|
|
|
|
|
|
class LLMBackend:
|
|
_default_params: dict
|
|
|
|
def handle_response(self, success, request: flask.Request, response_json_body: dict, response_status_code: int, client_ip, token, prompt, elapsed_time, parameters, headers):
|
|
raise NotImplementedError
|
|
|
|
def validate_params(self, params_dict: dict) -> Tuple[bool, str | None]:
|
|
raise NotImplementedError
|
|
|
|
# def get_model_info(self) -> Tuple[dict | bool, Exception | None]:
|
|
# raise NotImplementedError
|
|
|
|
def get_parameters(self, parameters) -> Tuple[dict | None, str | None]:
|
|
"""
|
|
Validate and return the parameters for this backend.
|
|
Lets you set defaults for specific backends.
|
|
:param parameters:
|
|
:return:
|
|
"""
|
|
raise NotImplementedError
|
|
|
|
def validate_request(self, parameters: dict) -> Tuple[bool, Union[str, None]]:
|
|
raise NotImplementedError
|
|
|
|
def validate_prompt(self, prompt: str) -> Tuple[bool, Union[str, None]]:
|
|
prompt_len = len(tokenizer(prompt))
|
|
if prompt_len > opts.context_size - 10:
|
|
return False, f'Token indices sequence length is longer than the specified maximum sequence length for this model ({prompt_len} > {opts.context_size}). Please lower your context size'
|
|
return True, None
|