handle backend offline in tokenizer
This commit is contained in:
parent
7acaa3c885
commit
62d5d43da4
|
@ -4,6 +4,8 @@ import requests
|
|||
import tiktoken
|
||||
|
||||
from llm_server import opts
|
||||
from llm_server.cluster.backend import get_a_cluster_backend
|
||||
from llm_server.cluster.cluster_config import cluster_config
|
||||
|
||||
|
||||
def tokenize(prompt: str, backend_url: str) -> int:
|
||||
|
@ -11,6 +13,16 @@ def tokenize(prompt: str, backend_url: str) -> int:
|
|||
assert isinstance(prompt, str)
|
||||
assert isinstance(backend_url, str)
|
||||
|
||||
# TODO: put this in a shared function
|
||||
# The backend could have died between when the request was
|
||||
# submitted and now, so let's double check it's still online.
|
||||
backend_info = cluster_config.get_backend(backend_url)
|
||||
if not backend_info['online']:
|
||||
old = backend_url
|
||||
backend_url = get_a_cluster_backend()
|
||||
print(f'Backend {old} offline. Request was redirected to {backend_url}')
|
||||
del old # gc
|
||||
|
||||
if not prompt:
|
||||
# The tokenizers have issues when the prompt is None.
|
||||
return 0
|
||||
|
|
Reference in New Issue