diff --git a/llm_server/database/create.py b/llm_server/database/create.py index 3febc78..18f563c 100644 --- a/llm_server/database/create.py +++ b/llm_server/database/create.py @@ -12,7 +12,7 @@ def create_db(): backend_mode text COLLATE pg_catalog."default" NOT NULL, backend_url text COLLATE pg_catalog."default" NOT NULL, request_url text COLLATE pg_catalog."default" NOT NULL, - generation_time double precision NOT NULL, + generation_time double precision, prompt text COLLATE pg_catalog."default" NOT NULL, prompt_tokens integer NOT NULL, response text COLLATE pg_catalog."default" NOT NULL, diff --git a/llm_server/database/database.py b/llm_server/database/database.py index ffd6296..272b141 100644 --- a/llm_server/database/database.py +++ b/llm_server/database/database.py @@ -34,7 +34,7 @@ def do_db_log(ip: str, token: str, prompt: str, response: Union[str, None], gen_ # Sometimes we may want to insert null into the DB, but # usually we want to insert a float. - if gen_time: + if gen_time is not None: gen_time = round(gen_time, 3) if is_error: gen_time = None diff --git a/llm_server/llm/openai/oai_to_vllm.py b/llm_server/llm/openai/oai_to_vllm.py index 33a0994..ed3ba51 100644 --- a/llm_server/llm/openai/oai_to_vllm.py +++ b/llm_server/llm/openai/oai_to_vllm.py @@ -97,7 +97,7 @@ def return_oai_internal_server_error(): return jsonify({ "error": { "message": "Internal server error", - "type": "auth_subrequest_error", + "type": None, "param": None, "code": "internal_error" } @@ -110,6 +110,6 @@ def return_oai_invalid_request_error(msg: str = None): "message": msg, "type": "invalid_request_error", "param": None, - "code": "model_not_found" + "code": None } }), 404 diff --git a/llm_server/routes/openai_request_handler.py b/llm_server/routes/openai_request_handler.py index 8d5f6d9..062204e 100644 --- a/llm_server/routes/openai_request_handler.py +++ b/llm_server/routes/openai_request_handler.py @@ -46,8 +46,6 @@ class OpenAIRequestHandler(RequestHandler): self.prompt = transform_messages_to_prompt(oai_messages, disable_openai_handling) self.request_json_body = oai_to_vllm(self.request_json_body, stop_hashes=('instruct' not in self.request_json_body['model'].lower()), mode=self.cluster_backend_info['mode']) - print(self.prompt) - request_valid, invalid_response = self.validate_request() if not request_valid: return invalid_response