hf_text-generation-inference/aml/deployment.yaml

39 lines
998 B
YAML
Raw Normal View History

2022-10-15 12:21:50 -06:00
$schema: https://azuremlschemas.azureedge.net/latest/managedOnlineDeployment.schema.json
name: bloom-deployment
endpoint_name: bloom-inference
2023-02-06 06:33:56 -07:00
model: azureml:bloom-safetensors:1
2022-10-15 12:21:50 -06:00
model_mount_path: /var/azureml-model
environment_variables:
2023-02-06 06:33:56 -07:00
WEIGHTS_CACHE_OVERRIDE: /var/azureml-model/bloom-safetensors
MODEL_ID: bigscience/bloom
NUM_SHARD: 8
2022-10-15 12:21:50 -06:00
environment:
2023-02-06 06:33:56 -07:00
image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.2.0
2022-10-15 12:21:50 -06:00
inference_config:
liveness_route:
port: 80
2022-10-15 12:21:50 -06:00
path: /health
readiness_route:
port: 80
2022-10-15 12:21:50 -06:00
path: /health
scoring_route:
port: 80
2022-10-15 12:21:50 -06:00
path: /generate
instance_type: Standard_ND96amsr_A100_v4
request_settings:
2023-02-06 10:11:23 -07:00
request_timeout_ms: 60000
2022-10-17 02:39:59 -06:00
max_concurrent_requests_per_instance: 256
2022-10-15 12:21:50 -06:00
liveness_probe:
2023-02-06 10:11:23 -07:00
initial_delay: 80
timeout: 60
period: 60
2022-10-15 12:21:50 -06:00
success_threshold: 1
2023-02-06 10:11:23 -07:00
failure_threshold: 2
2022-10-15 12:21:50 -06:00
readiness_probe:
2023-02-06 10:11:23 -07:00
initial_delay: 80
timeout: 60
period: 60
2022-10-15 12:21:50 -06:00
success_threshold: 1
2023-02-06 10:11:23 -07:00
failure_threshold: 2
2022-10-15 12:21:50 -06:00
instance_count: 1