Update grafana template (#1918)

As per title, there was a mistake

credit to @Narsil 

updated
https://huggingface.co/docs/text-generation-inference/basic_tutorials/monitoring
as well

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
fxmarty 2024-05-17 17:37:23 +02:00 committed by GitHub
parent c4cf8b49d1
commit 422bf1f986
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 30 additions and 4 deletions

View File

@ -93,7 +93,7 @@
}, },
{ {
"color": "red", "color": "red",
"value": 80 "value": 1000
} }
] ]
}, },
@ -103,7 +103,7 @@
}, },
"gridPos": { "gridPos": {
"h": 7, "h": 7,
"w": 9, "w": 8,
"x": 0, "x": 0,
"y": 0 "y": 0
}, },
@ -132,10 +132,36 @@
"uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "((histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m]))) * 1000) + histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m]))))>0 ", "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m]))) * 1000) > 0",
"hide": true,
"instant": false, "instant": false,
"legendFormat": "__auto",
"range": true, "range": true,
"refId": "A" "refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
},
"editorMode": "code",
"expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m]))) * 1000) > 0",
"hide": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "C"
},
{
"datasource": {
"name": "Expression",
"type": "__expr__",
"uid": "__expr__"
},
"expression": "$B + $C",
"hide": false,
"refId": "D",
"type": "math"
} }
], ],
"title": "Time to first token", "title": "Time to first token",