local-llm-server/llm_server/llm/info.py

33 lines
1.1 KiB
Python
Raw Normal View History

import requests
from llm_server import opts
def get_running_model():
2023-09-11 20:47:19 -06:00
# TODO: cache the results for 1 min so we don't have to keep calling the backend
# TODO: only use one try/catch
2023-09-11 20:47:19 -06:00
if opts.mode == 'oobabooga':
try:
2023-08-23 16:11:32 -06:00
backend_response = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
r_json = backend_response.json()
return r_json['result'], None
except Exception as e:
2023-08-23 16:02:57 -06:00
return False, e
elif opts.mode == 'hf-textgen':
try:
2023-08-23 16:11:32 -06:00
backend_response = requests.get(f'{opts.backend_url}/info', verify=opts.verify_ssl)
r_json = backend_response.json()
return r_json['model_id'].replace('/', '_'), None
except Exception as e:
2023-08-23 16:02:57 -06:00
return False, e
2023-09-11 20:47:19 -06:00
elif opts.mode == 'vllm':
try:
backend_response = requests.get(f'{opts.backend_url}/model', timeout=3, verify=opts.verify_ssl)
2023-09-11 20:47:19 -06:00
r_json = backend_response.json()
return r_json['model'], None
2023-09-11 20:47:19 -06:00
except Exception as e:
return False, e
else:
raise Exception