didnt test anything
This commit is contained in:
parent
84369d6c78
commit
05a45e6ac6
|
@ -74,7 +74,7 @@ def log_prompt(ip, token, prompt, response, gen_time, parameters, headers, backe
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
conn = sqlite3.connect(opts.database_path)
|
conn = sqlite3.connect(opts.database_path)
|
||||||
c = conn.cursor()
|
c = conn.cursor()
|
||||||
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||||
(ip, token, opts.running_model, opts.mode, opts.backend_url, request_url, gen_time, prompt, prompt_tokens, response, response_tokens, backend_response_code, json.dumps(parameters), json.dumps(headers), timestamp))
|
(ip, token, opts.running_model, opts.mode, opts.backend_url, request_url, gen_time, prompt, prompt_tokens, response, response_tokens, backend_response_code, json.dumps(parameters), json.dumps(headers), timestamp))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
|
@ -6,7 +6,7 @@ import flask
|
||||||
class LLMBackend:
|
class LLMBackend:
|
||||||
default_params: dict
|
default_params: dict
|
||||||
|
|
||||||
def handle_response(self, request: flask.Request, success, response: flask.Response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
|
def handle_response(self, success, request: flask.Request, response: flask.Response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def validate_params(self, params_dict: dict) -> Tuple[bool, str | None]:
|
def validate_params(self, params_dict: dict) -> Tuple[bool, str | None]:
|
||||||
|
|
|
@ -9,7 +9,7 @@ from ...routes.helpers.http import validate_json
|
||||||
|
|
||||||
|
|
||||||
class OobaboogaBackend(LLMBackend):
|
class OobaboogaBackend(LLMBackend):
|
||||||
def handle_response(self, request, success, response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
|
def handle_response(self, success, request, response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
|
||||||
backend_err = False
|
backend_err = False
|
||||||
response_valid_json, response_json_body = validate_json(response)
|
response_valid_json, response_json_body = validate_json(response)
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -17,7 +17,7 @@ from llm_server.routes.helpers.http import validate_json
|
||||||
class VLLMBackend(LLMBackend):
|
class VLLMBackend(LLMBackend):
|
||||||
default_params = vars(SamplingParams())
|
default_params = vars(SamplingParams())
|
||||||
|
|
||||||
def handle_response(self, request, success, response, error_msg, client_ip, token, prompt: str, elapsed_time, parameters, headers):
|
def handle_response(self, success, request, response, error_msg, client_ip, token, prompt: str, elapsed_time, parameters, headers):
|
||||||
response_valid_json, response_json_body = validate_json(response)
|
response_valid_json, response_json_body = validate_json(response)
|
||||||
backend_err = False
|
backend_err = False
|
||||||
try:
|
try:
|
||||||
|
@ -41,7 +41,7 @@ class VLLMBackend(LLMBackend):
|
||||||
# f'HTTP CODE {response_status_code}'
|
# f'HTTP CODE {response_status_code}'
|
||||||
# )
|
# )
|
||||||
|
|
||||||
log_prompt(client_ip, token, prompt, backend_response, elapsed_time if not backend_err else None, parameters, headers, response_status_code, request.url, response_tokens=response_json_body.get('details', {}).get('generated_tokens'), is_error=backend_err)
|
log_prompt(ip=client_ip, token=token, prompt=prompt, response=backend_response, gen_time=elapsed_time if not backend_err else None, parameters=parameters, headers=headers, backend_response_code=response_status_code, request_url=request.url, response_tokens=response_json_body.get('details', {}).get('generated_tokens'), is_error=backend_err)
|
||||||
return jsonify({'results': [{'text': backend_response}]}), 200
|
return jsonify({'results': [{'text': backend_response}]}), 200
|
||||||
else:
|
else:
|
||||||
backend_response = format_sillytavern_err(f'The backend did not return valid JSON.', 'error')
|
backend_response = format_sillytavern_err(f'The backend did not return valid JSON.', 'error')
|
||||||
|
|
|
@ -62,12 +62,12 @@ class OpenAIRequestHandler(RequestHandler):
|
||||||
elapsed_time = end_time - self.start_time
|
elapsed_time = end_time - self.start_time
|
||||||
|
|
||||||
self.used = True
|
self.used = True
|
||||||
response, response_status_code = self.backend.handle_response(success, backend_response, error_msg, self.client_ip, self.token, self.prompt, elapsed_time, self.parameters, dict(self.request.headers))
|
response, response_status_code = self.backend.handle_response(success=success, request=self.request, response=backend_response, error_msg=error_msg, client_ip=self.client_ip, token=self.token, prompt=self.prompt, elapsed_time=elapsed_time, parameters=self.parameters, headers=dict(self.request.headers))
|
||||||
return build_openai_response(self.prompt, response.json['results'][0]['text']), 200
|
return build_openai_response(self.prompt, response.json['results'][0]['text']), 200
|
||||||
|
|
||||||
def handle_ratelimited(self):
|
def handle_ratelimited(self):
|
||||||
backend_response = format_sillytavern_err(f'Ratelimited: you are only allowed to have {opts.simultaneous_requests_per_ip} simultaneous requests at a time. Please complete your other requests before sending another.', 'error')
|
backend_response = format_sillytavern_err(f'Ratelimited: you are only allowed to have {opts.simultaneous_requests_per_ip} simultaneous requests at a time. Please complete your other requests before sending another.', 'error')
|
||||||
log_prompt(self.client_ip, self.token, self.request_json_body.get('prompt', ''), backend_response, None, self.parameters, dict(self.request.headers), 429, self.request.url, is_error=True)
|
log_prompt(ip=self.client_ip, token=self.token, prompt=self.request_json_body.get('prompt', ''), response=backend_response, gen_time=None, parameters=self.parameters, headers=dict(self.request.headers), backend_response_code=429, request_url=self.request.url, is_error=True)
|
||||||
return build_openai_response(self.prompt, backend_response), 200
|
return build_openai_response(self.prompt, backend_response), 200
|
||||||
|
|
||||||
def transform_messages_to_prompt(self):
|
def transform_messages_to_prompt(self):
|
||||||
|
|
Reference in New Issue