From ab2ad91da38aec963e25917cf2e3f7688e6600db Mon Sep 17 00:00:00 2001
From: OlivierDehaene <olivier@huggingface.co>
Date: Mon, 23 Jan 2023 17:33:08 +0100
Subject: [PATCH] fix(docker): fix api-inference deployment (#30)

---
 Dockerfile          | 3 ++-
 aml/deployment.yaml | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 932d85a..801f29d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -31,6 +31,7 @@ ENV LANG=C.UTF-8 \
     QUANTIZE=false \
     NUM_GPUS=1 \
     SAFETENSORS_FAST_GPU=1 \
+    PORT=80 \
     CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
     NCCL_ASYNC_ERROR_HANDLING=1 \
     CUDA_HOME=/usr/local/cuda \
@@ -70,4 +71,4 @@ COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/loca
 # Install launcher
 COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher
 
-CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --json-output
\ No newline at end of file
+CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --model-name $MODEL_ID --json-output
\ No newline at end of file
diff --git a/aml/deployment.yaml b/aml/deployment.yaml
index 59fdf59..6769072 100644
--- a/aml/deployment.yaml
+++ b/aml/deployment.yaml
@@ -11,13 +11,13 @@ environment:
   image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
   inference_config:
     liveness_route:
-      port: 3000
+      port: 80
       path: /health
     readiness_route:
-      port: 3000
+      port: 80
       path: /health
     scoring_route:
-      port: 3000
+      port: 80
       path: /generate
 instance_type: Standard_ND96amsr_A100_v4
 request_settings: