diff --git a/llm_server/routes/openai_request_handler.py b/llm_server/routes/openai_request_handler.py index 0b768d1..4b4740c 100644 --- a/llm_server/routes/openai_request_handler.py +++ b/llm_server/routes/openai_request_handler.py @@ -25,7 +25,6 @@ class OpenAIRequestHandler(RequestHandler): self.prompt = None def handle_request(self) -> Tuple[flask.Response, int]: - print('recieved request') assert not self.used if opts.openai_silent_trim: @@ -39,7 +38,6 @@ class OpenAIRequestHandler(RequestHandler): return invalid_response if opts.openai_moderation_enabled and opts.openai_api_key and is_api_key_moderated(self.token): - print('moderating', self.token) try: # Gather the last message from the user and all preceding system messages msg_l = self.request.json['messages'].copy() @@ -60,17 +58,14 @@ class OpenAIRequestHandler(RequestHandler): traceback.print_exc() # TODO: support Ooba - print('converting to vllm') self.parameters = oai_to_vllm(self.parameters, hashes=True, mode=self.cluster_backend_info['mode']) - print('generating') llm_request = {**self.parameters, 'prompt': self.prompt} (success, _, _, _), (backend_response, backend_response_status_code) = self.generate_response(llm_request) model = self.request_json_body.get('model') if success: - print('sent success response') return self.build_openai_response(self.prompt, backend_response.json['results'][0]['text'], model=model), backend_response_status_code else: return backend_response, backend_response_status_code