feat(docker): Make the image compatible with api-inference (#29)
This commit is contained in:
parent
1f570d181f
commit
f9d0ec376a
|
@ -26,10 +26,10 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
|
|||
ENV LANG=C.UTF-8 \
|
||||
LC_ALL=C.UTF-8 \
|
||||
DEBIAN_FRONTEND=noninteractive \
|
||||
MODEL_BASE_PATH=/var/azureml-model \
|
||||
MODEL_NAME=bigscience/bloom \
|
||||
MODEL_BASE_PATH=/data \
|
||||
MODEL_ID=bigscience/bloom-560m \
|
||||
QUANTIZE=false \
|
||||
NUM_GPUS=8 \
|
||||
NUM_GPUS=1 \
|
||||
SAFETENSORS_FAST_GPU=1 \
|
||||
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
|
||||
NCCL_ASYNC_ERROR_HANDLING=1 \
|
||||
|
|
|
@ -5,7 +5,7 @@ model: azureml:bloom:1
|
|||
model_mount_path: /var/azureml-model
|
||||
environment_variables:
|
||||
MODEL_BASE_PATH: /var/azureml-model/bloom
|
||||
MODEL_NAME: bigscience/bloom
|
||||
MODEL_ID: bigscience/bloom
|
||||
NUM_GPUS: 8
|
||||
environment:
|
||||
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
|
||||
|
|
|
@ -197,6 +197,7 @@ pub async fn run(
|
|||
let app = Router::new()
|
||||
.route("/generate", post(generate))
|
||||
.layer(Extension(shared_state.clone()))
|
||||
.route("/", get(health))
|
||||
.route("/health", get(health))
|
||||
.layer(Extension(shared_state.clone()));
|
||||
|
||||
|
|
Loading…
Reference in New Issue