[Unit] Description=Local LLM Proxy Server Wants=basic.target After=basic.target network.target local-llm-daemon.service Requires=local-llm-daemon.service [Service] User=server Group=server WorkingDirectory=/srv/server/local-llm-server # Need a lot of workers since we have long-running requests # Takes about 3.5G memory ExecStart=/srv/server/local-llm-server/venv/bin/gunicorn --workers 20 --bind 0.0.0.0:5000 server:app --timeout 60 --worker-class gevent -c /srv/server/local-llm-server/other/gconfig.py Restart=always RestartSec=2 SyslogIdentifier=local-llm-server [Install] WantedBy=multi-user.target