from llm_server.cluster.cluster_config import cluster_config from llm_server.llm import oobabooga, vllm def get_token_count(prompt: str, backend_url: str): assert isinstance(backend_url, str) if not prompt: # The tokenizers have issues when the prompt is None. return 0 assert isinstance(prompt, str) backend_mode = cluster_config.get_backend(backend_url)['mode'] if backend_mode == 'vllm': return vllm.tokenize(prompt, backend_url) elif backend_mode == 'ooba': return oobabooga.tokenize(prompt) else: raise Exception(backend_mode)