This commit is contained in:
parent
3d0a5cf0a2
commit
5a61bdccd4
|
@ -1,15 +1,15 @@
|
||||||
from llm_server.cluster.cluster_config import cluster_config
|
from llm_server.cluster.cluster_config import cluster_config
|
||||||
from llm_server.llm import oobabooga, vllm
|
from llm_server.llm import oobabooga, vllm
|
||||||
from llm_server.custom_redis import redis
|
|
||||||
|
|
||||||
|
|
||||||
def get_token_count(prompt: str, backend_url: str):
|
def get_token_count(prompt: str, backend_url: str):
|
||||||
if not isinstance(prompt, str):
|
|
||||||
print(prompt)
|
|
||||||
|
|
||||||
assert isinstance(prompt, str)
|
|
||||||
assert isinstance(backend_url, str)
|
assert isinstance(backend_url, str)
|
||||||
|
|
||||||
|
if not prompt:
|
||||||
|
# The tokenizers have issues when the prompt is None.
|
||||||
|
return 0
|
||||||
|
assert isinstance(prompt, str)
|
||||||
|
|
||||||
backend_mode = cluster_config.get_backend(backend_url)['mode']
|
backend_mode = cluster_config.get_backend(backend_url)['mode']
|
||||||
if backend_mode == 'vllm':
|
if backend_mode == 'vllm':
|
||||||
return vllm.tokenize(prompt, backend_url)
|
return vllm.tokenize(prompt, backend_url)
|
||||||
|
|
|
@ -9,16 +9,17 @@ from llm_server.cluster.cluster_config import cluster_config
|
||||||
|
|
||||||
def tokenize(prompt: str, backend_url: str) -> int:
|
def tokenize(prompt: str, backend_url: str) -> int:
|
||||||
assert backend_url
|
assert backend_url
|
||||||
assert isinstance(prompt, str)
|
|
||||||
assert isinstance(backend_url, str)
|
assert isinstance(backend_url, str)
|
||||||
|
|
||||||
|
if not prompt:
|
||||||
|
# The tokenizers have issues when the prompt is None.
|
||||||
|
return 0
|
||||||
|
assert isinstance(prompt, str)
|
||||||
|
|
||||||
# The backend could have died between when the request was
|
# The backend could have died between when the request was
|
||||||
# submitted and now, so let's double check it's still online.
|
# submitted and now, so let's double check it's still online.
|
||||||
backend_url = cluster_config.validate_backend(backend_url)
|
backend_url = cluster_config.validate_backend(backend_url)
|
||||||
|
|
||||||
if not prompt:
|
|
||||||
# The tokenizers have issues when the prompt is None.
|
|
||||||
return 0
|
|
||||||
tokenizer = tiktoken.get_encoding("cl100k_base")
|
tokenizer = tiktoken.get_encoding("cl100k_base")
|
||||||
|
|
||||||
# Split the prompt into 300 character chunks
|
# Split the prompt into 300 character chunks
|
||||||
|
|
Reference in New Issue