16 lines
559 B
Python
16 lines
559 B
Python
from llm_server import opts
|
|
from llm_server.cluster.cluster_config import cluster_config
|
|
|
|
|
|
def generator(request_json_body, cluster_backend, timeout: int = None):
|
|
mode = cluster_config.get_backend(cluster_backend)['mode']
|
|
if mode == 'ooba':
|
|
# from .oobabooga.generate import generate
|
|
# return generate(request_json_body)
|
|
raise NotImplementedError
|
|
elif mode == 'vllm':
|
|
from .vllm.generate import generate
|
|
return generate(request_json_body, cluster_backend, timeout=timeout)
|
|
else:
|
|
raise Exception
|