fix(docker): fix api-inference deployment (#30)

2023-01-23 17:33:08 +01:00 · 2023-01-23 17:33:08 +01:00 · ab2ad91da3
parent f9d0ec376a
commit ab2ad91da3
2 changed files with 5 additions and 4 deletions
--- a/3
+++ b/3
@ -31,6 +31,7 @@ ENV LANG=C.UTF-8 \
    QUANTIZE=false \
    NUM_GPUS=1 \
    SAFETENSORS_FAST_GPU=1 \
+    PORT=80 \
    CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
    NCCL_ASYNC_ERROR_HANDLING=1 \
    CUDA_HOME=/usr/local/cuda \
@ -70,4 +71,4 @@ COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/loca
 # Install launcher
 COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher

-CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --json-output
+CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --model-name $MODEL_ID --json-output
--- a/aml/deployment.yaml
+++ b/aml/deployment.yaml
@ -11,13 +11,13 @@ environment:
  image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
  inference_config:
    liveness_route:
-      port: 3000
+      port: 80
      path: /health
    readiness_route:
-      port: 3000
+      port: 80
      path: /health
    scoring_route:
-      port: 3000
+      port: 80
      path: /generate
 instance_type: Standard_ND96amsr_A100_v4
 request_settings: