fix(docker): fix api-inference deployment (#30)
This commit is contained in:
parent
f9d0ec376a
commit
ab2ad91da3
|
@ -31,6 +31,7 @@ ENV LANG=C.UTF-8 \
|
|||
QUANTIZE=false \
|
||||
NUM_GPUS=1 \
|
||||
SAFETENSORS_FAST_GPU=1 \
|
||||
PORT=80 \
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
|
||||
NCCL_ASYNC_ERROR_HANDLING=1 \
|
||||
CUDA_HOME=/usr/local/cuda \
|
||||
|
@ -70,4 +71,4 @@ COPY --from=router-builder /usr/local/cargo/bin/text-generation-router /usr/loca
|
|||
# Install launcher
|
||||
COPY --from=launcher-builder /usr/local/cargo/bin/text-generation-launcher /usr/local/bin/text-generation-launcher
|
||||
|
||||
CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --json-output
|
||||
CMD HUGGINGFACE_HUB_CACHE=$MODEL_BASE_PATH text-generation-launcher --num-shard $NUM_GPUS --model-name $MODEL_ID --json-output
|
|
@ -11,13 +11,13 @@ environment:
|
|||
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
|
||||
inference_config:
|
||||
liveness_route:
|
||||
port: 3000
|
||||
port: 80
|
||||
path: /health
|
||||
readiness_route:
|
||||
port: 3000
|
||||
port: 80
|
||||
path: /health
|
||||
scoring_route:
|
||||
port: 3000
|
||||
port: 80
|
||||
path: /generate
|
||||
instance_type: Standard_ND96amsr_A100_v4
|
||||
request_settings:
|
||||
|
|
Loading…
Reference in New Issue