diff --git a/other/vllm/Docker/start-container.sh b/other/vllm/Docker/start-container.sh index 7cccb88..0e51cea 100644 --- a/other/vllm/Docker/start-container.sh +++ b/other/vllm/Docker/start-container.sh @@ -16,7 +16,7 @@ if [ -f /storage/vllm/ssh ]; then fi if [ ! -f /storage/vllm/cmd.txt ]; then - echo "--model /storage/vllm/models/model-path --max-num-batched-tokens 4098" >/storage/vllm/cmd.txt + echo "--max-num-batched-tokens 4098 --quantization awq --model /storage/vllm/models/model-path" >/storage/vllm/cmd.txt fi cp /local-llm-server/other/vllm/Docker/idle.ipynb /notebooks/idle.ipynb