20 lines
604 B
Python
20 lines
604 B
Python
from llm_server.cluster.cluster_config import cluster_config
|
|
from llm_server.llm import oobabooga, vllm
|
|
|
|
|
|
def get_token_count(prompt: str, backend_url: str):
|
|
assert isinstance(backend_url, str)
|
|
|
|
if not prompt:
|
|
# The tokenizers have issues when the prompt is None.
|
|
return 0
|
|
assert isinstance(prompt, str)
|
|
|
|
backend_mode = cluster_config.get_backend(backend_url)['mode']
|
|
if backend_mode == 'vllm':
|
|
return vllm.tokenize(prompt, backend_url)
|
|
elif backend_mode == 'ooba':
|
|
return oobabooga.tokenize(prompt)
|
|
else:
|
|
raise Exception(backend_mode)
|