from typing import Tuple, Union import flask from llm_server import opts from llm_server.llm import get_token_count class LLMBackend: _default_params: dict def handle_response(self, success, request: flask.Request, response_json_body: dict, response_status_code: int, client_ip, token, prompt, elapsed_time, parameters, headers): raise NotImplementedError def validate_params(self, params_dict: dict) -> Tuple[bool, str | None]: raise NotImplementedError # def get_model_info(self) -> Tuple[dict | bool, Exception | None]: # raise NotImplementedError def get_parameters(self, parameters) -> Tuple[dict | None, str | None]: """ Validate and return the parameters for this backend. Lets you set defaults for specific backends. :param parameters: :return: """ raise NotImplementedError def validate_request(self, parameters: dict, prompt: str, request: flask.Request) -> Tuple[bool, Union[str, None]]: """ If a backend needs to do other checks not related to the prompt or parameters. Default is no extra checks preformed. :param parameters: :return: """ return True, None def validate_prompt(self, prompt: str) -> Tuple[bool, Union[str, None]]: prompt_len = get_token_count(prompt) if prompt_len > opts.context_size - 10: return False, f'Token indices sequence length is longer than the specified maximum sequence length for this model ({prompt_len} > {opts.context_size}). Please lower your context size' return True, None