2023-09-12 01:04:11 -06:00
from typing import Tuple , Union
2023-08-30 18:53:26 -06:00
2023-09-13 11:22:33 -06:00
import flask
2023-09-14 18:31:13 -06:00
from llm_server import opts
2023-09-20 20:30:31 -06:00
from llm_server . llm . backend import tokenizer
2023-09-14 18:31:13 -06:00
2023-08-30 18:53:26 -06:00
class LLMBackend :
2023-09-14 17:38:20 -06:00
_default_params : dict
2023-09-12 16:40:09 -06:00
2023-09-14 14:05:50 -06:00
def handle_response ( self , success , request : flask . Request , response_json_body : dict , response_status_code : int , client_ip , token , prompt , elapsed_time , parameters , headers ) :
2023-08-30 18:53:26 -06:00
raise NotImplementedError
2023-09-11 20:47:19 -06:00
def validate_params ( self , params_dict : dict ) - > Tuple [ bool , str | None ] :
2023-08-30 18:53:26 -06:00
raise NotImplementedError
2023-09-11 20:47:19 -06:00
# def get_model_info(self) -> Tuple[dict | bool, Exception | None]:
# raise NotImplementedError
2023-09-12 01:04:11 -06:00
2023-09-12 10:30:45 -06:00
def get_parameters ( self , parameters ) - > Tuple [ dict | None , str | None ] :
2023-09-12 01:04:11 -06:00
"""
Validate and return the parameters for this backend .
Lets you set defaults for specific backends .
: param parameters :
: return :
"""
raise NotImplementedError
2023-09-12 16:40:09 -06:00
2023-09-14 14:05:50 -06:00
def validate_request ( self , parameters : dict ) - > Tuple [ bool , Union [ str , None ] ] :
2023-09-12 16:40:09 -06:00
raise NotImplementedError
2023-09-14 18:31:13 -06:00
2023-09-20 20:30:31 -06:00
def validate_prompt ( self , prompt : str ) - > Tuple [ bool , Union [ str , None ] ] :
prompt_len = len ( tokenizer ( prompt ) )
if prompt_len > opts . context_size - 10 :
2023-09-14 18:31:13 -06:00
return False , f ' Token indices sequence length is longer than the specified maximum sequence length for this model ( { prompt_len } > { opts . context_size } ). Please lower your context size '
return True , None