diff --git a/llm_server/routes/openai/chat_completions.py b/llm_server/routes/openai/chat_completions.py index f45756a..1b9b2d7 100644 --- a/llm_server/routes/openai/chat_completions.py +++ b/llm_server/routes/openai/chat_completions.py @@ -57,6 +57,7 @@ def openai_chat_completions(): else: handler.prompt = transform_messages_to_prompt(handler.request.json['messages']) + generated_text = '' response_status_code = 0 start_time = time.time() @@ -154,3 +155,4 @@ def openai_chat_completions(): # The worker incremented it, we'll decrement it. decrement_ip_count(handler.client_ip, 'processing_ips') decr_active_workers(handler.selected_model, handler.backend_url) + print(len(generated_text))