diff --git a/llm_server/llm/__init__.py b/llm_server/llm/__init__.py index 5f87c61..73e439f 100644 --- a/llm_server/llm/__init__.py +++ b/llm_server/llm/__init__.py @@ -3,13 +3,7 @@ from llm_server.llm import oobabooga, vllm def get_token_count(prompt: str, backend_url: str): - assert isinstance(backend_url, str) - - if not prompt: - # The tokenizers have issues when the prompt is None. - return 0 - assert isinstance(prompt, str) - + backend_url = cluster_config.validate_backend(backend_url) backend_mode = cluster_config.get_backend(backend_url)['mode'] if backend_mode == 'vllm': return vllm.tokenize(prompt, backend_url) diff --git a/llm_server/routes/openai/chat_completions.py b/llm_server/routes/openai/chat_completions.py index 0c62df9..e470a7b 100644 --- a/llm_server/routes/openai/chat_completions.py +++ b/llm_server/routes/openai/chat_completions.py @@ -150,7 +150,6 @@ def openai_chat_completions(): # The worker incremented it, we'll decrement it. decrement_ip_count(handler.client_ip, 'processing_ips') decr_active_workers(handler.selected_model, handler.backend_url) - print(len(generated_text)) return Response(generate(), mimetype='text/event-stream') except Exception: