adjust requests timeout, add service file

This commit is contained in:
Cyberes 2023-09-14 01:32:49 -06:00
parent 035c17c48b
commit c45e68a8c8
6 changed files with 20 additions and 7 deletions

View File

@ -9,14 +9,14 @@ def get_running_model():
if opts.mode == 'oobabooga': if opts.mode == 'oobabooga':
try: try:
backend_response = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl) backend_response = requests.get(f'{opts.backend_url}/api/v1/model', timeout=10, verify=opts.verify_ssl)
r_json = backend_response.json() r_json = backend_response.json()
return r_json['result'], None return r_json['result'], None
except Exception as e: except Exception as e:
return False, e return False, e
elif opts.mode == 'vllm': elif opts.mode == 'vllm':
try: try:
backend_response = requests.get(f'{opts.backend_url}/model', timeout=3, verify=opts.verify_ssl) backend_response = requests.get(f'{opts.backend_url}/model', timeout=10, verify=opts.verify_ssl)
r_json = backend_response.json() r_json = backend_response.json()
return r_json['model'], None return r_json['model'], None
except Exception as e: except Exception as e:

View File

@ -9,7 +9,7 @@ from llm_server import opts
def generate(json_data: dict): def generate(json_data: dict):
try: try:
r = requests.post(f'{opts.backend_url}/api/v1/generate', json=json_data, verify=opts.verify_ssl) r = requests.post(f'{opts.backend_url}/api/v1/generate', json=json_data, verify=opts.verify_ssl, timeout=120)
except Exception as e: except Exception as e:
return False, None, f'{e.__class__.__name__}: {e}' return False, None, f'{e.__class__.__name__}: {e}'
if r.status_code != 200: if r.status_code != 200:

View File

@ -79,7 +79,7 @@ def transform_prompt_to_text(prompt: list):
def handle_blocking_request(json_data: dict): def handle_blocking_request(json_data: dict):
try: try:
r = requests.post(f'{opts.backend_url}/generate', json=prepare_json(json_data), verify=opts.verify_ssl) r = requests.post(f'{opts.backend_url}/generate', json=prepare_json(json_data), verify=opts.verify_ssl, timeout=120)
except Exception as e: except Exception as e:
return False, None, f'{e.__class__.__name__}: {e}' return False, None, f'{e.__class__.__name__}: {e}'

View File

@ -44,7 +44,7 @@ def get_gpu_wh(gpu_id: int):
"format": "json", "format": "json",
"options": "absolute|jsonwrap" "options": "absolute|jsonwrap"
} }
response = requests.get(f'{opts.netdata_root}/api/v1/data', params=params) response = requests.get(f'{opts.netdata_root}/api/v1/data', params=params, timeout=10)
data = json.loads(response.text) data = json.loads(response.text)
total_power_usage_watts = sum(point[1] for point in data['result']['data']) total_power_usage_watts = sum(point[1] for point in data['result']['data'])
# total_power_usage_watt_hours = round(total_power_usage_watts / 3600, 1) # total_power_usage_watt_hours = round(total_power_usage_watts / 3600, 1)

15
other/vllm/vllm.service Normal file
View File

@ -0,0 +1,15 @@
[Unit]
Description=VLLM Backend
Wants=basic.target
After=basic.target network.target
[Service]
User=USERNAME
Group=USERNAME
# Can add --disable-log-requests when I know the backend won't crash
ExecStart=/storage/vllm/venv/bin/python /storage/vllm/api_server.py --model /storage/oobabooga/one-click-installers/text-generation-webui/models/TheBloke_MythoMax-L2-13B-GPTQ/ --host 0.0.0.0 --port 7000 --max-num-batched-tokens 24576
Restart=always
RestartSec=2
[Install]
WantedBy=multi-user.target

View File

@ -12,8 +12,6 @@ from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.utils import random_uuid from vllm.utils import random_uuid
# python api_server.py --model /storage/oobabooga/one-click-installers/text-generation-webui/models/TheBloke_MythoMax-L2-13B-GPTQ/ --host 0.0.0.0 --port 7000 --max-num-batched-tokens 24576
TIMEOUT_KEEP_ALIVE = 5 # seconds. TIMEOUT_KEEP_ALIVE = 5 # seconds.
TIMEOUT_TO_PREVENT_DEADLOCK = 1 # seconds. TIMEOUT_TO_PREVENT_DEADLOCK = 1 # seconds.
app = FastAPI() app = FastAPI()