import requests from llm_server import opts def get_running_model(): # TODO: cache the results for 1 min so we don't have to keep calling the backend # TODO: only use one try/catch if opts.mode == 'oobabooga': try: backend_response = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl) r_json = backend_response.json() return r_json['result'], None except Exception as e: return False, e elif opts.mode == 'vllm': try: backend_response = requests.get(f'{opts.backend_url}/model', timeout=3, verify=opts.verify_ssl) r_json = backend_response.json() return r_json['model'], None except Exception as e: return False, e else: raise Exception