local-llm-server/other/vllm/vllm.service

15 lines
392 B
SYSTEMD

[Unit]
Description=VLLM Backend
Wants=basic.target
After=basic.target network.target
[Service]
User=vllm
ExecStart=/storage/vllm/vllm-venv/bin/python3.11 /storage/vllm/api_server.py --model /storage/models/awq/MythoMax-L2-13B-AWQ --quantization awq --host 0.0.0.0 --port 7000 --gpu-memory-utilization 0.95 --max-log-len 100
Restart=always
RestartSec=2
[Install]
WantedBy=multi-user.target