diff --git a/Dockerfile b/Dockerfile index 932d85ac..801f29d4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,6 +31,7 @@ ENV LANG=C.UTF-8 \ QUANTIZE=false \ NUM_GPUS=1 \ SAFETENSORS_FAST_GPU=1 \ + PORT=80 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ NCCL_ASYNC_ERROR_HANDLING=1 \ CUDA_HOME=/usr/local/cuda \ @@ -70,4 +71,4 @@ COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/loca # Install launcher COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher -CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --json-output \ No newline at end of file +CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --model-name $MODEL_ID --json-output \ No newline at end of file diff --git a/aml/deployment.yaml b/aml/deployment.yaml index 59fdf59a..67690722 100644 --- a/aml/deployment.yaml +++ b/aml/deployment.yaml @@ -11,13 +11,13 @@ environment: image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1 inference_config: liveness_route: - port: 3000 + port: 80 path: /health readiness_route: - port: 3000 + port: 80 path: /health scoring_route: - port: 3000 + port: 80 path: /generate instance_type: Standard_ND96amsr_A100_v4 request_settings: