feat(docker): Make the image compatible with api-inference (#29)
This commit is contained in:
parent
1f570d181f
commit
f9d0ec376a
|
@ -26,10 +26,10 @@ FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
|
||||||
ENV LANG=C.UTF-8 \
|
ENV LANG=C.UTF-8 \
|
||||||
LC_ALL=C.UTF-8 \
|
LC_ALL=C.UTF-8 \
|
||||||
DEBIAN_FRONTEND=noninteractive \
|
DEBIAN_FRONTEND=noninteractive \
|
||||||
MODEL_BASE_PATH=/var/azureml-model \
|
MODEL_BASE_PATH=/data \
|
||||||
MODEL_NAME=bigscience/bloom \
|
MODEL_ID=bigscience/bloom-560m \
|
||||||
QUANTIZE=false \
|
QUANTIZE=false \
|
||||||
NUM_GPUS=8 \
|
NUM_GPUS=1 \
|
||||||
SAFETENSORS_FAST_GPU=1 \
|
SAFETENSORS_FAST_GPU=1 \
|
||||||
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
|
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
|
||||||
NCCL_ASYNC_ERROR_HANDLING=1 \
|
NCCL_ASYNC_ERROR_HANDLING=1 \
|
||||||
|
|
|
@ -5,7 +5,7 @@ model: azureml:bloom:1
|
||||||
model_mount_path: /var/azureml-model
|
model_mount_path: /var/azureml-model
|
||||||
environment_variables:
|
environment_variables:
|
||||||
MODEL_BASE_PATH: /var/azureml-model/bloom
|
MODEL_BASE_PATH: /var/azureml-model/bloom
|
||||||
MODEL_NAME: bigscience/bloom
|
MODEL_ID: bigscience/bloom
|
||||||
NUM_GPUS: 8
|
NUM_GPUS: 8
|
||||||
environment:
|
environment:
|
||||||
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
|
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.3.1
|
||||||
|
|
|
@ -197,6 +197,7 @@ pub async fn run(
|
||||||
let app = Router::new()
|
let app = Router::new()
|
||||||
.route("/generate", post(generate))
|
.route("/generate", post(generate))
|
||||||
.layer(Extension(shared_state.clone()))
|
.layer(Extension(shared_state.clone()))
|
||||||
|
.route("/", get(health))
|
||||||
.route("/health", get(health))
|
.route("/health", get(health))
|
||||||
.layer(Extension(shared_state.clone()));
|
.layer(Extension(shared_state.clone()));
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue