From ab2ad91da38aec963e25917cf2e3f7688e6600db Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Mon, 23 Jan 2023 17:33:08 +0100 Subject: [PATCH] fix(docker): fix api-inference deployment (#30) --- Dockerfile | 3 ++- aml/deployment.yaml | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 932d85a..801f29d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -31,6 +31,7 @@ ENV LANG=C.UTF-8 \ QUANTIZE=false \ NUM_GPUS=1 \ SAFETENSORS_FAST_GPU=1 \ + PORT=80 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ NCCL_ASYNC_ERROR_HANDLING=1 \ CUDA_HOME=/usr/local/cuda \ @@ -70,4 +71,4 @@ COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/loca # Install launcher COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher -CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --json-output \ No newline at end of file +CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --model-name $MODEL_ID --json-output \ No newline at end of file diff --git a/aml/deployment.yaml b/aml/deployment.yaml index 59fdf59..6769072 100644 --- a/aml/deployment.yaml +++ b/aml/deployment.yaml @@ -11,13 +11,13 @@ environment: image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1 inference_config: liveness_route: - port: 3000 + port: 80 path: /health readiness_route: - port: 3000 + port: 80 path: /health scoring_route: - port: 3000 + port: 80 path: /generate instance_type: Standard_ND96amsr_A100_v4 request_settings: