This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
local-llm-server/llm_server/llm/__init__.py

20 lines
604 B
Python
Raw Normal View History

2023-10-03 13:47:18 -06:00
from llm_server.cluster.cluster_config import cluster_config
from llm_server.llm import oobabooga, vllm
2023-09-30 19:41:50 -06:00
def get_token_count(prompt: str, backend_url: str):
2023-10-05 18:07:59 -06:00
assert isinstance(backend_url, str)
2023-10-05 18:06:36 -06:00
2023-10-05 18:07:59 -06:00
if not prompt:
# The tokenizers have issues when the prompt is None.
return 0
assert isinstance(prompt, str)
2023-10-03 13:47:18 -06:00
backend_mode = cluster_config.get_backend(backend_url)['mode']
if backend_mode == 'vllm':
2023-09-30 19:41:50 -06:00
return vllm.tokenize(prompt, backend_url)
elif backend_mode == 'ooba':
return oobabooga.tokenize(prompt)
else:
raise Exception(backend_mode)