Update grafana template (#1918)
As per title, there was a mistake credit to @Narsil updated https://huggingface.co/docs/text-generation-inference/basic_tutorials/monitoring as well Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
parent
c4cf8b49d1
commit
422bf1f986
|
@ -93,7 +93,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"color": "red",
|
"color": "red",
|
||||||
"value": 80
|
"value": 1000
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -103,7 +103,7 @@
|
||||||
},
|
},
|
||||||
"gridPos": {
|
"gridPos": {
|
||||||
"h": 7,
|
"h": 7,
|
||||||
"w": 9,
|
"w": 8,
|
||||||
"x": 0,
|
"x": 0,
|
||||||
"y": 0
|
"y": 0
|
||||||
},
|
},
|
||||||
|
@ -132,10 +132,36 @@
|
||||||
"uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
|
"uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
|
||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"expr": "((histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m]))) * 1000) + histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m]))))>0 ",
|
"expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m]))) * 1000) > 0",
|
||||||
|
"hide": true,
|
||||||
"instant": false,
|
"instant": false,
|
||||||
|
"legendFormat": "__auto",
|
||||||
"range": true,
|
"range": true,
|
||||||
"refId": "A"
|
"refId": "B"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
|
||||||
|
},
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m]))) * 1000) > 0",
|
||||||
|
"hide": true,
|
||||||
|
"instant": false,
|
||||||
|
"legendFormat": "__auto",
|
||||||
|
"range": true,
|
||||||
|
"refId": "C"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"name": "Expression",
|
||||||
|
"type": "__expr__",
|
||||||
|
"uid": "__expr__"
|
||||||
|
},
|
||||||
|
"expression": "$B + $C",
|
||||||
|
"hide": false,
|
||||||
|
"refId": "D",
|
||||||
|
"type": "math"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": "Time to first token",
|
"title": "Time to first token",
|
||||||
|
|
Loading…
Reference in New Issue