diff --git a/other/vllm/Docker/Dockerfile b/other/vllm/Docker/Dockerfile
index 55111a7..da878ce 100644
--- a/other/vllm/Docker/Dockerfile
+++ b/other/vllm/Docker/Dockerfile
@@ -72,6 +72,8 @@ RUN chown -R apiserver:apiserver /local-llm-server && \
     chown -R apiserver:apiserver /app && \
     chown -R apiserver:apiserver /var/log/app/
 
+RUN chmod +x /app/start.sh
+
 ENV SHELL="/bin/bash"
 
 # SSH
@@ -83,4 +85,4 @@ EXPOSE 7000
 # Jupyter
 EXPOSE 8888
 
-CMD /app/start.sh
\ No newline at end of file
+CMD /app/start.sh
diff --git a/other/vllm/Docker/supervisord.conf b/other/vllm/Docker/supervisord.conf
index bf3c093..c0fc170 100644
--- a/other/vllm/Docker/supervisord.conf
+++ b/other/vllm/Docker/supervisord.conf
@@ -1,7 +1,7 @@
 [supervisord]
 nodaemon=true
 
-[program:api_server]
+[program:vllm_server]
 command=bash /app/start-vllm.sh 2>&1 | tee /var/log/app/vllm.log
 autostart=true
 autorestart=true
@@ -12,7 +12,7 @@ stderr_logfile_maxbytes=0
 user=apiserver
 environment=HOME="/home/apiserver",USER="apiserver"
 
-[program:proxy]
+[program:rathole]
 command=/app/rathole -c /app/client.toml 2>&1 | tee /var/log/app/rathole.log
 autostart=true
 autorestart=true
diff --git a/server.py b/server.py
index 6a9949f..98aaffa 100644
--- a/server.py
+++ b/server.py
@@ -26,18 +26,21 @@ from llm_server.routes.v1 import bp
 from llm_server.stream import init_socketio
 
 # TODO: have the workers handle streaming too
-# TODO: send extra headers when ratelimited?
-# TODO: return 200 when returning formatted sillytavern error
-# TODO: add some sort of loadbalancer to send requests to a group of backends
+# TODO: add backend fallbacks. Backends at the bottom of the list are higher priority and are fallbacks if the upper ones fail
+# TODO: implement background thread to test backends via sending test prompts
+# TODO: if backend fails request, mark it as down
 # TODO: allow setting concurrent gens per-backend
-# TODO: use first backend as default backend
+# TODO: set the max tokens to that of the lowest backend
+# TODO: implement RRD backend loadbalancer option
 
 # TODO: simulate OpenAI error messages regardless of endpoint
-# TODO: allow setting specific simoltaneous IPs allowed per token
+# TODO: send extra headers when ratelimited?
 # TODO: make sure log_prompt() is used everywhere, including errors and invalid requests
 # TODO: unify logging thread in a function and use async/await instead
-# TODO: add more excluding to SYSTEM__ tokens
 
+# Done, but need to verify
+# TODO: add more excluding to SYSTEM__ tokens
+# TODO: return 200 when returning formatted sillytavern error
 
 try:
     import vllm