[Unit] Description=Local LLM Proxy Server Wants=basic.target After=basic.target network.target local-llm-daemon.service Requires=local-llm-daemon.service [Service] User=server WorkingDirectory=/srv/server/local-llm-server # Sometimes the old processes aren't terminated when the service is restarted. ExecStartPre=/usr/bin/pkill -9 -f "/srv/server/local-llm-server/venv/bin/python3 /srv/server/local-llm-server/venv/bin/gunicorn" # Need a lot of workers since we have long-running requests. This takes about 3.5G memory. ExecStart=/srv/server/local-llm-server/venv/bin/gunicorn --workers 20 --bind 0.0.0.0:5000 server:app --timeout 60 --worker-class gevent --access-logfile '-' --error-logfile '-' Restart=always RestartSec=2 SyslogIdentifier=local-llm-server [Install] WantedBy=multi-user.target