didnt test anything

This commit is contained in:
Cyberes 2023-09-13 11:51:46 -06:00
parent 84369d6c78
commit 05a45e6ac6
5 changed files with 7 additions and 7 deletions

View File

@ -74,7 +74,7 @@ def log_prompt(ip, token, prompt, response, gen_time, parameters, headers, backe
timestamp = int(time.time()) timestamp = int(time.time())
conn = sqlite3.connect(opts.database_path) conn = sqlite3.connect(opts.database_path)
c = conn.cursor() c = conn.cursor()
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(ip, token, opts.running_model, opts.mode, opts.backend_url, request_url, gen_time, prompt, prompt_tokens, response, response_tokens, backend_response_code, json.dumps(parameters), json.dumps(headers), timestamp)) (ip, token, opts.running_model, opts.mode, opts.backend_url, request_url, gen_time, prompt, prompt_tokens, response, response_tokens, backend_response_code, json.dumps(parameters), json.dumps(headers), timestamp))
conn.commit() conn.commit()
conn.close() conn.close()

View File

@ -6,7 +6,7 @@ import flask
class LLMBackend: class LLMBackend:
default_params: dict default_params: dict
def handle_response(self, request: flask.Request, success, response: flask.Response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers): def handle_response(self, success, request: flask.Request, response: flask.Response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
raise NotImplementedError raise NotImplementedError
def validate_params(self, params_dict: dict) -> Tuple[bool, str | None]: def validate_params(self, params_dict: dict) -> Tuple[bool, str | None]:

View File

@ -9,7 +9,7 @@ from ...routes.helpers.http import validate_json
class OobaboogaBackend(LLMBackend): class OobaboogaBackend(LLMBackend):
def handle_response(self, request, success, response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers): def handle_response(self, success, request, response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
backend_err = False backend_err = False
response_valid_json, response_json_body = validate_json(response) response_valid_json, response_json_body = validate_json(response)
try: try:

View File

@ -17,7 +17,7 @@ from llm_server.routes.helpers.http import validate_json
class VLLMBackend(LLMBackend): class VLLMBackend(LLMBackend):
default_params = vars(SamplingParams()) default_params = vars(SamplingParams())
def handle_response(self, request, success, response, error_msg, client_ip, token, prompt: str, elapsed_time, parameters, headers): def handle_response(self, success, request, response, error_msg, client_ip, token, prompt: str, elapsed_time, parameters, headers):
response_valid_json, response_json_body = validate_json(response) response_valid_json, response_json_body = validate_json(response)
backend_err = False backend_err = False
try: try:
@ -41,7 +41,7 @@ class VLLMBackend(LLMBackend):
# f'HTTP CODE {response_status_code}' # f'HTTP CODE {response_status_code}'
# ) # )
log_prompt(client_ip, token, prompt, backend_response, elapsed_time if not backend_err else None, parameters, headers, response_status_code, request.url, response_tokens=response_json_body.get('details', {}).get('generated_tokens'), is_error=backend_err) log_prompt(ip=client_ip, token=token, prompt=prompt, response=backend_response, gen_time=elapsed_time if not backend_err else None, parameters=parameters, headers=headers, backend_response_code=response_status_code, request_url=request.url, response_tokens=response_json_body.get('details', {}).get('generated_tokens'), is_error=backend_err)
return jsonify({'results': [{'text': backend_response}]}), 200 return jsonify({'results': [{'text': backend_response}]}), 200
else: else:
backend_response = format_sillytavern_err(f'The backend did not return valid JSON.', 'error') backend_response = format_sillytavern_err(f'The backend did not return valid JSON.', 'error')

View File

@ -62,12 +62,12 @@ class OpenAIRequestHandler(RequestHandler):
elapsed_time = end_time - self.start_time elapsed_time = end_time - self.start_time
self.used = True self.used = True
response, response_status_code = self.backend.handle_response(success, backend_response, error_msg, self.client_ip, self.token, self.prompt, elapsed_time, self.parameters, dict(self.request.headers)) response, response_status_code = self.backend.handle_response(success=success, request=self.request, response=backend_response, error_msg=error_msg, client_ip=self.client_ip, token=self.token, prompt=self.prompt, elapsed_time=elapsed_time, parameters=self.parameters, headers=dict(self.request.headers))
return build_openai_response(self.prompt, response.json['results'][0]['text']), 200 return build_openai_response(self.prompt, response.json['results'][0]['text']), 200
def handle_ratelimited(self): def handle_ratelimited(self):
backend_response = format_sillytavern_err(f'Ratelimited: you are only allowed to have {opts.simultaneous_requests_per_ip} simultaneous requests at a time. Please complete your other requests before sending another.', 'error') backend_response = format_sillytavern_err(f'Ratelimited: you are only allowed to have {opts.simultaneous_requests_per_ip} simultaneous requests at a time. Please complete your other requests before sending another.', 'error')
log_prompt(self.client_ip, self.token, self.request_json_body.get('prompt', ''), backend_response, None, self.parameters, dict(self.request.headers), 429, self.request.url, is_error=True) log_prompt(ip=self.client_ip, token=self.token, prompt=self.request_json_body.get('prompt', ''), response=backend_response, gen_time=None, parameters=self.parameters, headers=dict(self.request.headers), backend_response_code=429, request_url=self.request.url, is_error=True)
return build_openai_response(self.prompt, backend_response), 200 return build_openai_response(self.prompt, backend_response), 200
def transform_messages_to_prompt(self): def transform_messages_to_prompt(self):