This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
2023-08-23 20:12:38 -06:00
|
|
|
from llm_server import opts
|
2023-10-01 16:04:53 -06:00
|
|
|
from llm_server.cluster.cluster_config import cluster_config
|
2023-08-23 20:12:38 -06:00
|
|
|
|
|
|
|
|
2023-09-30 19:41:50 -06:00
|
|
|
def generator(request_json_body, cluster_backend, timeout: int = None):
|
2023-10-01 16:04:53 -06:00
|
|
|
mode = cluster_config.get_backend(cluster_backend)['mode']
|
|
|
|
if mode == 'ooba':
|
2023-09-27 21:15:54 -06:00
|
|
|
# from .oobabooga.generate import generate
|
|
|
|
# return generate(request_json_body)
|
|
|
|
raise NotImplementedError
|
2023-10-01 16:04:53 -06:00
|
|
|
elif mode == 'vllm':
|
2023-09-11 20:47:19 -06:00
|
|
|
from .vllm.generate import generate
|
2023-09-30 19:41:50 -06:00
|
|
|
return generate(request_json_body, cluster_backend, timeout=timeout)
|
2023-08-23 20:12:38 -06:00
|
|
|
else:
|
|
|
|
raise Exception
|