feat(docker): Make the image compatible with api-inference (#29)

This commit is contained in:
OlivierDehaene 2023-01-23 17:11:27 +01:00 committed by GitHub
parent 1f570d181f
commit f9d0ec376a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 5 additions and 4 deletions

View File

@ -26,10 +26,10 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
ENV LANG=C.UTF-8 \ ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \ LC_ALL=C.UTF-8 \
DEBIAN_FRONTEND=noninteractive \ DEBIAN_FRONTEND=noninteractive \
MODEL_BASE_PATH=/var/azureml-model \ MODEL_BASE_PATH=/data \
MODEL_NAME=bigscience/bloom \ MODEL_ID=bigscience/bloom-560m \
QUANTIZE=false \ QUANTIZE=false \
NUM_GPUS=8 \ NUM_GPUS=1 \
SAFETENSORS_FAST_GPU=1 \ SAFETENSORS_FAST_GPU=1 \
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
NCCL_ASYNC_ERROR_HANDLING=1 \ NCCL_ASYNC_ERROR_HANDLING=1 \

View File

@ -5,7 +5,7 @@ model: azureml:bloom:1
model_mount_path: /var/azureml-model model_mount_path: /var/azureml-model
environment_variables: environment_variables:
MODEL_BASE_PATH: /var/azureml-model/bloom MODEL_BASE_PATH: /var/azureml-model/bloom
MODEL_NAME: bigscience/bloom MODEL_ID: bigscience/bloom
NUM_GPUS: 8 NUM_GPUS: 8
environment: environment:
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1 image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1

View File

@ -197,6 +197,7 @@ pub async fn run(
let app = Router::new() let app = Router::new()
.route("/generate", post(generate)) .route("/generate", post(generate))
.layer(Extension(shared_state.clone())) .layer(Extension(shared_state.clone()))
.route("/", get(health))
.route("/health", get(health)) .route("/health", get(health))
.layer(Extension(shared_state.clone())); .layer(Extension(shared_state.clone()));