diff --git a/aml/deployment.yaml b/aml/deployment.yaml index 9b90c2e6..fecc6c8f 100644 --- a/aml/deployment.yaml +++ b/aml/deployment.yaml @@ -8,7 +8,7 @@ environment_variables: MODEL_ID: bigscience/bloom NUM_SHARD: 8 environment: - image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:sha-4e685d9 + image: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference:0.4.0 inference_config: liveness_route: port: 80 @@ -24,14 +24,14 @@ request_settings: request_timeout_ms: 60000 max_concurrent_requests_per_instance: 256 liveness_probe: - initial_delay: 200 - timeout: 60 + initial_delay: 90 + timeout: 20 period: 60 success_threshold: 1 failure_threshold: 2 readiness_probe: - initial_delay: 200 - timeout: 60 + initial_delay: 90 + timeout: 20 period: 60 success_threshold: 1 failure_threshold: 2