fix(docker): fix api-inference deployment (#30)

This commit is contained in:
OlivierDehaene 2023-01-23 17:33:08 +01:00 committed by GitHub
parent f9d0ec376a
commit ab2ad91da3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 4 deletions

View File

@ -31,6 +31,7 @@ ENV LANG=C.UTF-8 \
QUANTIZE=false \ QUANTIZE=false \
NUM_GPUS=1 \ NUM_GPUS=1 \
SAFETENSORS_FAST_GPU=1 \ SAFETENSORS_FAST_GPU=1 \
PORT=80 \
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
NCCL_ASYNC_ERROR_HANDLING=1 \ NCCL_ASYNC_ERROR_HANDLING=1 \
CUDA_HOME=/usr/local/cuda \ CUDA_HOME=/usr/local/cuda \
@ -70,4 +71,4 @@ COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/loca
# Install launcher # Install launcher
COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher
CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --json-output CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --model-name $MODEL_ID --json-output

View File

@ -11,13 +11,13 @@ environment:
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1 image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
inference_config: inference_config:
liveness_route: liveness_route:
port: 3000 port: 80
path: /health path: /health
readiness_route: readiness_route:
port: 3000 port: 80
path: /health path: /health
scoring_route: scoring_route:
port: 3000 port: 80
path: /generate path: /generate
instance_type: Standard_ND96amsr_A100_v4 instance_type: Standard_ND96amsr_A100_v4
request_settings: request_settings: