handle backend offline in tokenizer

This commit is contained in:
Cyberes 2023-10-04 13:34:59 -06:00
parent 7acaa3c885
commit 62d5d43da4
1 changed files with 12 additions and 0 deletions

View File

@ -4,6 +4,8 @@ import requests
import tiktoken
from llm_server import opts
from llm_server.cluster.backend import get_a_cluster_backend
from llm_server.cluster.cluster_config import cluster_config
def tokenize(prompt: str, backend_url: str) -> int:
@ -11,6 +13,16 @@ def tokenize(prompt: str, backend_url: str) -> int:
assert isinstance(prompt, str)
assert isinstance(backend_url, str)
# TODO: put this in a shared function
# The backend could have died between when the request was
# submitted and now, so let's double check it's still online.
backend_info = cluster_config.get_backend(backend_url)
if not backend_info['online']:
old = backend_url
backend_url = get_a_cluster_backend()
print(f'Backend {old} offline. Request was redirected to {backend_url}')
del old # gc
if not prompt:
# The tokenizers have issues when the prompt is None.
return 0