feat(docker): Make the image compatible with api-inference (#29)

This commit is contained in:
OlivierDehaene 2023-01-23 17:11:27 +01:00 committed by GitHub
parent 1f570d181f
commit f9d0ec376a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 5 additions and 4 deletions

View File

@ -26,10 +26,10 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
DEBIAN_FRONTEND=noninteractive \
MODEL_BASE_PATH=/var/azureml-model \
MODEL_NAME=bigscience/bloom \
MODEL_BASE_PATH=/data \
MODEL_ID=bigscience/bloom-560m \
QUANTIZE=false \
NUM_GPUS=8 \
NUM_GPUS=1 \
SAFETENSORS_FAST_GPU=1 \
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
NCCL_ASYNC_ERROR_HANDLING=1 \

View File

@ -5,7 +5,7 @@ model: azureml:bloom:1
model_mount_path: /var/azureml-model
environment_variables:
MODEL_BASE_PATH: /var/azureml-model/bloom
MODEL_NAME: bigscience/bloom
MODEL_ID: bigscience/bloom
NUM_GPUS: 8
environment:
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1

View File

@ -197,6 +197,7 @@ pub async fn run(
let app = Router::new()
.route("/generate", post(generate))
.layer(Extension(shared_state.clone()))
.route("/", get(health))
.route("/health", get(health))
.layer(Extension(shared_state.clone()));