from llm_server import opts from llm_server.cluster.cluster_config import cluster_config def generator(request_json_body, cluster_backend, timeout: int = None): mode = cluster_config.get_backend(cluster_backend)['mode'] if mode == 'ooba': # from .oobabooga.generate import generate # return generate(request_json_body) raise NotImplementedError elif mode == 'vllm': from .vllm.generate import generate return generate(request_json_body, cluster_backend, timeout=timeout) else: raise Exception