use backend handler to build parameters when sending test prompt
This commit is contained in:
parent
90adffaec8
commit
be03569165
|
@ -6,6 +6,7 @@ from llm_server.cluster.stores import redis_running_models
|
||||||
from llm_server.custom_redis import redis
|
from llm_server.custom_redis import redis
|
||||||
from llm_server.llm.generator import generator
|
from llm_server.llm.generator import generator
|
||||||
from llm_server.llm.info import get_info
|
from llm_server.llm.info import get_info
|
||||||
|
from llm_server.llm.vllm.vllm_backend import VLLMBackend
|
||||||
from llm_server.routes.queue import priority_queue
|
from llm_server.routes.queue import priority_queue
|
||||||
from llm_server.routes.stats import calculate_wait_time, get_active_gen_workers_model
|
from llm_server.routes.stats import calculate_wait_time, get_active_gen_workers_model
|
||||||
|
|
||||||
|
@ -33,11 +34,15 @@ def is_valid_model(model_name: str):
|
||||||
def test_backend(backend_url: str, test_prompt: bool = False):
|
def test_backend(backend_url: str, test_prompt: bool = False):
|
||||||
backend_info = cluster_config.get_backend(backend_url)
|
backend_info = cluster_config.get_backend(backend_url)
|
||||||
if test_prompt:
|
if test_prompt:
|
||||||
data = {
|
handler = VLLMBackend(backend_url)
|
||||||
"prompt": "Test prompt",
|
parameters, _ = handler.get_parameters({
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"temperature": 0,
|
"temperature": 0,
|
||||||
"max_new_tokens": 3,
|
"max_new_tokens": 3,
|
||||||
|
})
|
||||||
|
data = {
|
||||||
|
'prompt': 'test prompt',
|
||||||
|
**parameters
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
success, response, err = generator(data, backend_url, timeout=10)
|
success, response, err = generator(data, backend_url, timeout=10)
|
||||||
|
|
|
@ -66,7 +66,6 @@ def inference_do_stream(stream_name: str, msg_to_backend: dict, backend_url: str
|
||||||
json_obj = json.loads(json_str.decode())
|
json_obj = json.loads(json_str.decode())
|
||||||
new = json_obj['text'][0].split(prompt + generated_text)[1]
|
new = json_obj['text'][0].split(prompt + generated_text)[1]
|
||||||
generated_text = generated_text + new
|
generated_text = generated_text + new
|
||||||
print(new)
|
|
||||||
except IndexError:
|
except IndexError:
|
||||||
# ????
|
# ????
|
||||||
continue
|
continue
|
||||||
|
|
Reference in New Issue