[Unit] Description=Local LLM Proxy Wants=basic.target After=basic.target network.target [Service] User=server Group=server WorkingDirectory=/srv/server/local-llm-server # Need a lot of workers since we have long-running requests # Takes about 3.5G memory ExecStart=/srv/server/local-llm-server/venv/bin/gunicorn --workers 20 --bind 0.0.0.0:5000 server:app --timeout 60 --worker-class gevent Restart=always RestartSec=2 [Install] WantedBy=multi-user.target