Merge cluster to master #3

Merged
cyberes merged 163 commits from cluster into master 2023-10-27 19:19:22 -06:00
2 changed files with 10 additions and 9 deletions
Showing only changes of commit 5a61bdccd4 - Show all commits

View File

@ -1,15 +1,15 @@
from llm_server.cluster.cluster_config import cluster_config from llm_server.cluster.cluster_config import cluster_config
from llm_server.llm import oobabooga, vllm from llm_server.llm import oobabooga, vllm
from llm_server.custom_redis import redis
def get_token_count(prompt: str, backend_url: str): def get_token_count(prompt: str, backend_url: str):
if not isinstance(prompt, str):
print(prompt)
assert isinstance(prompt, str)
assert isinstance(backend_url, str) assert isinstance(backend_url, str)
if not prompt:
# The tokenizers have issues when the prompt is None.
return 0
assert isinstance(prompt, str)
backend_mode = cluster_config.get_backend(backend_url)['mode'] backend_mode = cluster_config.get_backend(backend_url)['mode']
if backend_mode == 'vllm': if backend_mode == 'vllm':
return vllm.tokenize(prompt, backend_url) return vllm.tokenize(prompt, backend_url)

View File

@ -9,16 +9,17 @@ from llm_server.cluster.cluster_config import cluster_config
def tokenize(prompt: str, backend_url: str) -> int: def tokenize(prompt: str, backend_url: str) -> int:
assert backend_url assert backend_url
assert isinstance(prompt, str)
assert isinstance(backend_url, str) assert isinstance(backend_url, str)
if not prompt:
# The tokenizers have issues when the prompt is None.
return 0
assert isinstance(prompt, str)
# The backend could have died between when the request was # The backend could have died between when the request was
# submitted and now, so let's double check it's still online. # submitted and now, so let's double check it's still online.
backend_url = cluster_config.validate_backend(backend_url) backend_url = cluster_config.validate_backend(backend_url)
if not prompt:
# The tokenizers have issues when the prompt is None.
return 0
tokenizer = tiktoken.get_encoding("cl100k_base") tokenizer = tiktoken.get_encoding("cl100k_base")
# Split the prompt into 300 character chunks # Split the prompt into 300 character chunks