didnt test anything

This commit is contained in:
Cyberes 2023-09-13 11:51:46 -06:00
parent 84369d6c78
commit 05a45e6ac6
5 changed files with 7 additions and 7 deletions

View File

@ -74,7 +74,7 @@ def log_prompt(ip, token, prompt, response, gen_time, parameters, headers, backe
timestamp = int(time.time())
conn = sqlite3.connect(opts.database_path)
c = conn.cursor()
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
c.execute("INSERT INTO prompts VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(ip, token, opts.running_model, opts.mode, opts.backend_url, request_url, gen_time, prompt, prompt_tokens, response, response_tokens, backend_response_code, json.dumps(parameters), json.dumps(headers), timestamp))
conn.commit()
conn.close()

View File

@ -6,7 +6,7 @@ import flask
class LLMBackend:
default_params: dict
def handle_response(self, request: flask.Request, success, response: flask.Response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
def handle_response(self, success, request: flask.Request, response: flask.Response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
raise NotImplementedError
def validate_params(self, params_dict: dict) -> Tuple[bool, str | None]:

View File

@ -9,7 +9,7 @@ from ...routes.helpers.http import validate_json
class OobaboogaBackend(LLMBackend):
def handle_response(self, request, success, response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
def handle_response(self, success, request, response, error_msg, client_ip, token, prompt, elapsed_time, parameters, headers):
backend_err = False
response_valid_json, response_json_body = validate_json(response)
try:

View File

@ -17,7 +17,7 @@ from llm_server.routes.helpers.http import validate_json
class VLLMBackend(LLMBackend):
default_params = vars(SamplingParams())
def handle_response(self, request, success, response, error_msg, client_ip, token, prompt: str, elapsed_time, parameters, headers):
def handle_response(self, success, request, response, error_msg, client_ip, token, prompt: str, elapsed_time, parameters, headers):
response_valid_json, response_json_body = validate_json(response)
backend_err = False
try:
@ -41,7 +41,7 @@ class VLLMBackend(LLMBackend):
# f'HTTP CODE {response_status_code}'
# )
log_prompt(client_ip, token, prompt, backend_response, elapsed_time if not backend_err else None, parameters, headers, response_status_code, request.url, response_tokens=response_json_body.get('details', {}).get('generated_tokens'), is_error=backend_err)
log_prompt(ip=client_ip, token=token, prompt=prompt, response=backend_response, gen_time=elapsed_time if not backend_err else None, parameters=parameters, headers=headers, backend_response_code=response_status_code, request_url=request.url, response_tokens=response_json_body.get('details', {}).get('generated_tokens'), is_error=backend_err)
return jsonify({'results': [{'text': backend_response}]}), 200
else:
backend_response = format_sillytavern_err(f'The backend did not return valid JSON.', 'error')

View File

@ -62,12 +62,12 @@ class OpenAIRequestHandler(RequestHandler):
elapsed_time = end_time - self.start_time
self.used = True
response, response_status_code = self.backend.handle_response(success, backend_response, error_msg, self.client_ip, self.token, self.prompt, elapsed_time, self.parameters, dict(self.request.headers))
response, response_status_code = self.backend.handle_response(success=success, request=self.request, response=backend_response, error_msg=error_msg, client_ip=self.client_ip, token=self.token, prompt=self.prompt, elapsed_time=elapsed_time, parameters=self.parameters, headers=dict(self.request.headers))
return build_openai_response(self.prompt, response.json['results'][0]['text']), 200
def handle_ratelimited(self):
backend_response = format_sillytavern_err(f'Ratelimited: you are only allowed to have {opts.simultaneous_requests_per_ip} simultaneous requests at a time. Please complete your other requests before sending another.', 'error')
log_prompt(self.client_ip, self.token, self.request_json_body.get('prompt', ''), backend_response, None, self.parameters, dict(self.request.headers), 429, self.request.url, is_error=True)
log_prompt(ip=self.client_ip, token=self.token, prompt=self.request_json_body.get('prompt', ''), response=backend_response, gen_time=None, parameters=self.parameters, headers=dict(self.request.headers), backend_response_code=429, request_url=self.request.url, is_error=True)
return build_openai_response(self.prompt, backend_response), 200
def transform_messages_to_prompt(self):