handle backend offline in tokenizer

2023-10-04 13:34:59 -06:00 · 2023-10-04 13:34:59 -06:00 · 62d5d43da4
parent 7acaa3c885
commit 62d5d43da4
1 changed files with 12 additions and 0 deletions
--- a/llm_server/llm/vllm/tokenize.py
+++ b/llm_server/llm/vllm/tokenize.py
@ -4,6 +4,8 @@ import requests
 import tiktoken

 from llm_server import opts
+from llm_server.cluster.backend import get_a_cluster_backend
+from llm_server.cluster.cluster_config import cluster_config


 def tokenize(prompt: str, backend_url: str) -> int:
@ -11,6 +13,16 @@ def tokenize(prompt: str, backend_url: str) -> int:
    assert isinstance(prompt, str)
    assert isinstance(backend_url, str)

+    # TODO: put this in a shared function
+    # The backend could have died between when the request was
+    # submitted and now, so let's double check it's still online.
+    backend_info = cluster_config.get_backend(backend_url)
+    if not backend_info['online']:
+        old = backend_url
+        backend_url = get_a_cluster_backend()
+        print(f'Backend {old} offline. Request was redirected to {backend_url}')
+        del old  # gc
+
    if not prompt:
        # The tokenizers have issues when the prompt is None.
        return 0