adjust requests timeout, add service file

This commit is contained in:
Cyberes 2023-09-14 01:32:49 -06:00
parent 035c17c48b
commit c45e68a8c8
6 changed files with 20 additions and 7 deletions

View File

@ -9,14 +9,14 @@ def get_running_model():
if opts.mode == 'oobabooga':
try:
backend_response = requests.get(f'{opts.backend_url}/api/v1/model', timeout=3, verify=opts.verify_ssl)
backend_response = requests.get(f'{opts.backend_url}/api/v1/model', timeout=10, verify=opts.verify_ssl)
r_json = backend_response.json()
return r_json['result'], None
except Exception as e:
return False, e
elif opts.mode == 'vllm':
try:
backend_response = requests.get(f'{opts.backend_url}/model', timeout=3, verify=opts.verify_ssl)
backend_response = requests.get(f'{opts.backend_url}/model', timeout=10, verify=opts.verify_ssl)
r_json = backend_response.json()
return r_json['model'], None
except Exception as e:

View File

@ -9,7 +9,7 @@ from llm_server import opts
def generate(json_data: dict):
try:
r = requests.post(f'{opts.backend_url}/api/v1/generate', json=json_data, verify=opts.verify_ssl)
r = requests.post(f'{opts.backend_url}/api/v1/generate', json=json_data, verify=opts.verify_ssl, timeout=120)
except Exception as e:
return False, None, f'{e.__class__.__name__}: {e}'
if r.status_code != 200:

View File

@ -79,7 +79,7 @@ def transform_prompt_to_text(prompt: list):
def handle_blocking_request(json_data: dict):
try:
r = requests.post(f'{opts.backend_url}/generate', json=prepare_json(json_data), verify=opts.verify_ssl)
r = requests.post(f'{opts.backend_url}/generate', json=prepare_json(json_data), verify=opts.verify_ssl, timeout=120)
except Exception as e:
return False, None, f'{e.__class__.__name__}: {e}'

View File

@ -44,7 +44,7 @@ def get_gpu_wh(gpu_id: int):
"format": "json",
"options": "absolute|jsonwrap"
}
response = requests.get(f'{opts.netdata_root}/api/v1/data', params=params)
response = requests.get(f'{opts.netdata_root}/api/v1/data', params=params, timeout=10)
data = json.loads(response.text)
total_power_usage_watts = sum(point[1] for point in data['result']['data'])
# total_power_usage_watt_hours = round(total_power_usage_watts / 3600, 1)

15
other/vllm/vllm.service Normal file
View File

@ -0,0 +1,15 @@
[Unit]
Description=VLLM Backend
Wants=basic.target
After=basic.target network.target
[Service]
User=USERNAME
Group=USERNAME
# Can add --disable-log-requests when I know the backend won't crash
ExecStart=/storage/vllm/venv/bin/python /storage/vllm/api_server.py --model /storage/oobabooga/one-click-installers/text-generation-webui/models/TheBloke_MythoMax-L2-13B-GPTQ/ --host 0.0.0.0 --port 7000 --max-num-batched-tokens 24576
Restart=always
RestartSec=2
[Install]
WantedBy=multi-user.target

View File

@ -12,8 +12,6 @@ from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.sampling_params import SamplingParams
from vllm.utils import random_uuid
# python api_server.py --model /storage/oobabooga/one-click-installers/text-generation-webui/models/TheBloke_MythoMax-L2-13B-GPTQ/ --host 0.0.0.0 --port 7000 --max-num-batched-tokens 24576
TIMEOUT_KEEP_ALIVE = 5 # seconds.
TIMEOUT_TO_PREVENT_DEADLOCK = 1 # seconds.
app = FastAPI()