2022-10-15 12:21:50 -06:00
|
|
|
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
|
|
|
|
name: bloom-deployment
|
|
|
|
endpoint_name: bloom-inference
|
2023-02-06 06:33:56 -07:00
|
|
|
model: azureml:bloom-safetensors:1
|
2022-10-15 12:21:50 -06:00
|
|
|
model_mount_path: /var/azureml-model
|
|
|
|
environment_variables:
|
2023-02-06 06:33:56 -07:00
|
|
|
WEIGHTS_CACHE_OVERRIDE: /var/azureml-model/bloom-safetensors
|
2023-01-23 09:11:27 -07:00
|
|
|
MODEL_ID: bigscience/bloom
|
2023-02-03 04:43:37 -07:00
|
|
|
NUM_SHARD: 8
|
2022-10-15 12:21:50 -06:00
|
|
|
environment:
|
2023-02-06 06:33:56 -07:00
|
|
|
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2.0
|
2022-10-15 12:21:50 -06:00
|
|
|
inference_config:
|
|
|
|
liveness_route:
|
2023-01-23 09:33:08 -07:00
|
|
|
port: 80
|
2022-10-15 12:21:50 -06:00
|
|
|
path: /health
|
|
|
|
readiness_route:
|
2023-01-23 09:33:08 -07:00
|
|
|
port: 80
|
2022-10-15 12:21:50 -06:00
|
|
|
path: /health
|
|
|
|
scoring_route:
|
2023-01-23 09:33:08 -07:00
|
|
|
port: 80
|
2022-10-15 12:21:50 -06:00
|
|
|
path: /generate
|
|
|
|
instance_type: Standard_ND96amsr_A100_v4
|
|
|
|
request_settings:
|
|
|
|
request_timeout_ms: 90000
|
2022-10-17 02:39:59 -06:00
|
|
|
max_concurrent_requests_per_instance: 256
|
2022-10-15 12:21:50 -06:00
|
|
|
liveness_probe:
|
2022-10-17 06:59:00 -06:00
|
|
|
initial_delay: 600
|
2022-11-08 09:42:38 -07:00
|
|
|
timeout: 90
|
2022-10-17 06:59:00 -06:00
|
|
|
period: 120
|
2022-10-15 12:21:50 -06:00
|
|
|
success_threshold: 1
|
2022-11-08 09:42:38 -07:00
|
|
|
failure_threshold: 5
|
2022-10-15 12:21:50 -06:00
|
|
|
readiness_probe:
|
2022-10-17 06:59:00 -06:00
|
|
|
initial_delay: 600
|
2022-11-08 09:42:38 -07:00
|
|
|
timeout: 90
|
2022-10-17 06:59:00 -06:00
|
|
|
period: 120
|
2022-10-15 12:21:50 -06:00
|
|
|
success_threshold: 1
|
2022-11-08 09:42:38 -07:00
|
|
|
failure_threshold: 5
|
2022-10-15 12:21:50 -06:00
|
|
|
instance_count: 1
|