handle backend offline in tokenizer
This commit is contained in:
parent
7acaa3c885
commit
62d5d43da4
|
@ -4,6 +4,8 @@ import requests
|
||||||
import tiktoken
|
import tiktoken
|
||||||
|
|
||||||
from llm_server import opts
|
from llm_server import opts
|
||||||
|
from llm_server.cluster.backend import get_a_cluster_backend
|
||||||
|
from llm_server.cluster.cluster_config import cluster_config
|
||||||
|
|
||||||
|
|
||||||
def tokenize(prompt: str, backend_url: str) -> int:
|
def tokenize(prompt: str, backend_url: str) -> int:
|
||||||
|
@ -11,6 +13,16 @@ def tokenize(prompt: str, backend_url: str) -> int:
|
||||||
assert isinstance(prompt, str)
|
assert isinstance(prompt, str)
|
||||||
assert isinstance(backend_url, str)
|
assert isinstance(backend_url, str)
|
||||||
|
|
||||||
|
# TODO: put this in a shared function
|
||||||
|
# The backend could have died between when the request was
|
||||||
|
# submitted and now, so let's double check it's still online.
|
||||||
|
backend_info = cluster_config.get_backend(backend_url)
|
||||||
|
if not backend_info['online']:
|
||||||
|
old = backend_url
|
||||||
|
backend_url = get_a_cluster_backend()
|
||||||
|
print(f'Backend {old} offline. Request was redirected to {backend_url}')
|
||||||
|
del old # gc
|
||||||
|
|
||||||
if not prompt:
|
if not prompt:
|
||||||
# The tokenizers have issues when the prompt is None.
|
# The tokenizers have issues when the prompt is None.
|
||||||
return 0
|
return 0
|
||||||
|
|
Reference in New Issue