fix(docker): fix api-inference deployment (#30)

This commit is contained in:
OlivierDehaene 2023-01-23 17:33:08 +01:00 committed by GitHub
parent f9d0ec376a
commit ab2ad91da3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 4 deletions

View File

@ -31,6 +31,7 @@ ENV LANG=C.UTF-8 \
QUANTIZE=false \
NUM_GPUS=1 \
SAFETENSORS_FAST_GPU=1 \
PORT=80 \
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
NCCL_ASYNC_ERROR_HANDLING=1 \
CUDA_HOME=/usr/local/cuda \
@ -70,4 +71,4 @@ COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/loca
# Install launcher
COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher
CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --json-output
CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --model-name $MODEL_ID --json-output

View File

@ -11,13 +11,13 @@ environment:
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
inference_config:
liveness_route:
port: 3000
port: 80
path: /health
readiness_route:
port: 3000
port: 80
path: /health
scoring_route:
port: 3000
port: 80
path: /generate
instance_type: Standard_ND96amsr_A100_v4
request_settings: