From 07d6f6d8e9f36895fc19e6b2fd1699eb9ee091a6 Mon Sep 17 00:00:00 2001 From: Cyberes Date: Tue, 3 Oct 2023 00:03:39 -0600 Subject: [PATCH] test --- llm_server/routes/openai_request_handler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llm_server/routes/openai_request_handler.py b/llm_server/routes/openai_request_handler.py index 0dfd558..429dccd 100644 --- a/llm_server/routes/openai_request_handler.py +++ b/llm_server/routes/openai_request_handler.py @@ -39,6 +39,7 @@ class OpenAIRequestHandler(RequestHandler): return invalid_response if opts.openai_api_key and is_api_key_moderated(self.token): + print('moderating') try: # Gather the last message from the user and all preceeding system messages msg_l = self.request.json['messages'].copy() @@ -59,8 +60,10 @@ class OpenAIRequestHandler(RequestHandler): print(traceback.format_exc()) # TODO: support Ooba + print('converting to vllm') self.parameters = oai_to_vllm(self.parameters, hashes=True, mode=self.cluster_backend_info['mode']) + print('generating') llm_request = {**self.parameters, 'prompt': self.prompt} (success, _, _, _), (backend_response, backend_response_status_code) = self.generate_response(llm_request) @@ -70,6 +73,7 @@ class OpenAIRequestHandler(RequestHandler): print('sent success response') return self.build_openai_response(self.prompt, backend_response.json['results'][0]['text'], model=model), backend_response_status_code else: + print(backend_response) return backend_response, backend_response_status_code def handle_ratelimited(self, do_log: bool = True):