2024-05-17 08:34:44 -06:00
{
"__inputs" : [
{
"name" : "DS_PROMETHEUS_EKS API INFERENCE PROD" ,
"label" : "Prometheus EKS API Inference Prod" ,
"description" : "" ,
"type" : "datasource" ,
"pluginId" : "prometheus" ,
"pluginName" : "Prometheus"
}
] ,
"__elements" : { } ,
"__requires" : [
{
"type" : "panel" ,
"id" : "gauge" ,
"name" : "Gauge" ,
"version" : ""
} ,
{
"type" : "grafana" ,
"id" : "grafana" ,
"name" : "Grafana" ,
"version" : "10.0.2"
} ,
{
"type" : "panel" ,
"id" : "heatmap" ,
"name" : "Heatmap" ,
"version" : ""
} ,
{
"type" : "datasource" ,
"id" : "prometheus" ,
"name" : "Prometheus" ,
"version" : "1.0.0"
} ,
{
"type" : "panel" ,
"id" : "timeseries" ,
"name" : "Time series" ,
"version" : ""
}
] ,
"annotations" : {
"list" : [
{
"builtIn" : 1 ,
"datasource" : {
"type" : "grafana" ,
"uid" : "-- Grafana --"
} ,
"enable" : true ,
"hide" : true ,
"iconColor" : "rgba(0, 211, 255, 1)" ,
"name" : "Annotations & Alerts" ,
"target" : {
"limit" : 100 ,
"matchAny" : false ,
"tags" : [ ] ,
"type" : "dashboard"
} ,
"type" : "dashboard"
}
]
} ,
"editable" : true ,
"fiscalYearStartMonth" : 0 ,
"graphTooltip" : 2 ,
"id" : 551 ,
"links" : [ ] ,
"liveNow" : false ,
"panels" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "thresholds"
} ,
"fieldMinMax" : false ,
"mappings" : [ ] ,
"min" : 0 ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
2024-05-17 09:37:23 -06:00
"value" : 1000
2024-05-17 08:34:44 -06:00
}
]
} ,
"unit" : "ms"
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 7 ,
2024-05-17 09:37:23 -06:00
"w" : 8 ,
2024-05-17 08:34:44 -06:00
"x" : 0 ,
"y" : 0
} ,
"id" : 49 ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "auto" ,
"orientation" : "auto" ,
"reduceOptions" : {
"calcs" : [
"mean"
] ,
"fields" : "" ,
"values" : false
} ,
"showPercentChange" : false ,
"textMode" : "auto" ,
"wideLayout" : true
} ,
"pluginVersion" : "10.4.2" ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
2024-05-17 09:37:23 -06:00
"expr" : "(histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m]))) * 1000) > 0" ,
"hide" : true ,
2024-05-17 08:34:44 -06:00
"instant" : false ,
2024-05-17 09:37:23 -06:00
"legendFormat" : "__auto" ,
2024-05-17 08:34:44 -06:00
"range" : true ,
2024-05-17 09:37:23 -06:00
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m]))) * 1000) > 0" ,
"hide" : true ,
"instant" : false ,
"legendFormat" : "__auto" ,
"range" : true ,
"refId" : "C"
} ,
{
"datasource" : {
"name" : "Expression" ,
"type" : "__expr__" ,
"uid" : "__expr__"
} ,
"expression" : "$B + $C" ,
"hide" : false ,
"refId" : "D" ,
"type" : "math"
2024-05-17 08:34:44 -06:00
}
] ,
"title" : "Time to first token" ,
"type" : "stat"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "thresholds"
} ,
"mappings" : [ ] ,
"min" : 0 ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "ms"
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 7 ,
"w" : 8 ,
"x" : 9 ,
"y" : 0
} ,
"id" : 44 ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "auto" ,
"orientation" : "auto" ,
"reduceOptions" : {
"calcs" : [
"mean"
] ,
"fields" : "" ,
"values" : false
} ,
"showPercentChange" : false ,
"textMode" : "auto" ,
"wideLayout" : true
} ,
"pluginVersion" : "10.4.2" ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m]))) * 1000)>0" ,
"instant" : false ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Decode per-token latency" ,
"type" : "stat"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "thresholds"
} ,
"mappings" : [ ] ,
"min" : 0 ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
}
]
} ,
"unit" : "short"
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 7 ,
"w" : 7 ,
"x" : 17 ,
"y" : 0
} ,
"id" : 45 ,
"options" : {
"colorMode" : "value" ,
"graphMode" : "area" ,
"justifyMode" : "auto" ,
"orientation" : "auto" ,
"reduceOptions" : {
"calcs" : [
"mean"
] ,
"fields" : "" ,
"values" : false
} ,
"showPercentChange" : false ,
"textMode" : "auto" ,
"wideLayout" : true
} ,
"pluginVersion" : "10.4.2" ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "sum((rate(tgi_request_generated_tokens_sum{container=\"$service\"}[10m]) / rate(tgi_request_generated_tokens_count{container=\"$service\"}[10m]))>0)" ,
"instant" : false ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Throughput (generated tok/s)" ,
"type" : "stat"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "none"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 8 ,
"w" : 12 ,
"x" : 0 ,
"y" : 7
} ,
"id" : 48 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Number of tokens per prompt" ,
"type" : "timeseries"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "none"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 8 ,
"w" : 12 ,
"x" : 12 ,
"y" : 7
} ,
"id" : 30 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Number of generated tokens per request" ,
"type" : "timeseries"
} ,
{
"collapsed" : false ,
"gridPos" : {
"h" : 1 ,
"w" : 24 ,
"x" : 0 ,
"y" : 15
} ,
"id" : 20 ,
"panels" : [ ] ,
"title" : "General" ,
"type" : "row"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 30 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 8 ,
"w" : 6 ,
"x" : 0 ,
"y" : 16
} ,
"id" : 4 ,
"maxDataPoints" : 100 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "sum(increase(tgi_request_success{container=\"$service\"}[1m]))" ,
"legendFormat" : "Success" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "sum(increase(tgi_request_failure{container=\"$service\"}[1m])) by (err)" ,
"hide" : false ,
"legendFormat" : "Error: {{err}}" ,
"range" : true ,
"refId" : "B"
}
] ,
"title" : "Requests" ,
"type" : "timeseries"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "s"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 13 ,
"w" : 9 ,
"x" : 6 ,
"y" : 16
} ,
"id" : 6 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Mean Time Per Token quantiles" ,
"type" : "timeseries"
} ,
{
"cards" : { } ,
"color" : {
"cardColor" : "#5794F2" ,
"colorScale" : "linear" ,
"colorScheme" : "interpolateSpectral" ,
"exponent" : 0.5 ,
"min" : 0 ,
"mode" : "opacity"
} ,
"dataFormat" : "tsbuckets" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"custom" : {
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"scaleDistribution" : {
"type" : "linear"
}
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 13 ,
"w" : 9 ,
"x" : 15 ,
"y" : 16
} ,
"heatmap" : { } ,
"hideZeroBuckets" : false ,
"highlightCards" : true ,
"id" : 13 ,
"legend" : {
"show" : false
} ,
"maxDataPoints" : 25 ,
"options" : {
"calculate" : false ,
"calculation" : { } ,
"cellGap" : 2 ,
"cellValues" : { } ,
"color" : {
"exponent" : 0.5 ,
"fill" : "#5794F2" ,
"min" : 0 ,
"mode" : "scheme" ,
"reverse" : false ,
"scale" : "exponential" ,
"scheme" : "Spectral" ,
"steps" : 128
} ,
"exemplars" : {
"color" : "rgba(255,0,255,0.7)"
} ,
"filterValues" : {
"le" : 1e-9
} ,
"legend" : {
"show" : false
} ,
"rowsFrame" : {
"layout" : "auto"
} ,
"showValue" : "never" ,
"tooltip" : {
"mode" : "single" ,
"showColorScale" : false ,
"yHistogram" : false
} ,
"yAxis" : {
"axisPlacement" : "left" ,
"decimals" : 1 ,
"reverse" : false ,
"unit" : "s"
}
} ,
"pluginVersion" : "10.4.2" ,
"reverseYBuckets" : false ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"exemplar" : true ,
"expr" : "sum(increase(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[5m])) by (le)" ,
"format" : "heatmap" ,
"interval" : "" ,
"legendFormat" : "{{ le }}" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Mean Time Per Token" ,
"tooltip" : {
"show" : true ,
"showHistogram" : false
} ,
"type" : "heatmap" ,
"xAxis" : {
"show" : true
} ,
"yAxis" : {
"decimals" : 1 ,
"format" : "s" ,
"logBase" : 1 ,
"show" : true
} ,
"yBucketBound" : "auto"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "auto" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "percentage" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "orange" ,
"value" : 70
} ,
{
"color" : "red" ,
"value" : 85
}
]
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 5 ,
"w" : 3 ,
"x" : 0 ,
"y" : 24
} ,
"id" : 18 ,
"options" : {
"legend" : {
"calcs" : [ ] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : false
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"pluginVersion" : "9.1.0" ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "count(tgi_request_count{container=\"$service\"})" ,
"legendFormat" : "Replicas" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Number of replicas" ,
"type" : "timeseries"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"mappings" : [ ] ,
"thresholds" : {
"mode" : "percentage" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "orange" ,
"value" : 70
} ,
{
"color" : "red" ,
"value" : 85
}
]
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 5 ,
"w" : 3 ,
"x" : 3 ,
"y" : 24
} ,
"id" : 32 ,
"options" : {
"minVizHeight" : 75 ,
"minVizWidth" : 75 ,
"orientation" : "auto" ,
"reduceOptions" : {
"calcs" : [
"lastNotNull"
] ,
"fields" : "" ,
"values" : false
} ,
"showThresholdLabels" : false ,
"showThresholdMarkers" : true ,
"sizing" : "auto"
} ,
"pluginVersion" : "10.4.2" ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "sum(tgi_queue_size{container=\"$service\"})" ,
"legendFormat" : "__auto" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Queue Size" ,
"type" : "gauge"
} ,
{
"collapsed" : false ,
"gridPos" : {
"h" : 1 ,
"w" : 24 ,
"x" : 0 ,
"y" : 29
} ,
"id" : 26 ,
"panels" : [ ] ,
"title" : "Batching" ,
"type" : "row"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "bars" ,
"fillOpacity" : 50 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "normal"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 5 ,
"w" : 6 ,
"x" : 0 ,
"y" : 30
} ,
"id" : 29 ,
"maxDataPoints" : 40 ,
"options" : {
"legend" : {
"calcs" : [ ] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : false
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"pluginVersion" : "9.1.0" ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "avg(tgi_batch_current_max_tokens{container=\"$service\"})" ,
"legendFormat" : "{{ pod }}" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Max tokens per batch" ,
"type" : "timeseries"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "none"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 9 ,
"w" : 4 ,
"x" : 6 ,
"y" : 30
} ,
"id" : 33 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Speculated Tokens" ,
"type" : "timeseries"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "none"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 9 ,
"w" : 5 ,
"x" : 10 ,
"y" : 30
} ,
"id" : 46 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Prompt Tokens" ,
"type" : "timeseries"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "s"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 9 ,
"w" : 9 ,
"x" : 15 ,
"y" : 30
} ,
"id" : 8 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Latency quantiles" ,
"type" : "timeseries"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "bars" ,
"fillOpacity" : 50 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "normal"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 4 ,
"w" : 6 ,
"x" : 0 ,
"y" : 35
} ,
"id" : 27 ,
"maxDataPoints" : 40 ,
"options" : {
"legend" : {
"calcs" : [ ] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : false
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"pluginVersion" : "9.1.0" ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "avg(tgi_batch_current_size{container=\"$service\"})" ,
"legendFormat" : "{{ pod }}" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Batch Size" ,
"type" : "timeseries"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 30 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 9 ,
"w" : 6 ,
"x" : 0 ,
"y" : 39
} ,
"id" : 28 ,
"maxDataPoints" : 100 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "sum(increase(tgi_batch_concat{container=\"$service\"}[1m])) by (reason)" ,
"hide" : false ,
"legendFormat" : "Reason: {{ reason }}" ,
"range" : true ,
"refId" : "B"
}
] ,
"title" : "Concatenates" ,
"type" : "timeseries"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "s"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 9 ,
"w" : 9 ,
"x" : 6 ,
"y" : 39
} ,
"id" : 31 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Queue quantiles" ,
"type" : "timeseries"
} ,
{
"collapsed" : false ,
"gridPos" : {
"h" : 1 ,
"w" : 24 ,
"x" : 0 ,
"y" : 48
} ,
"id" : 22 ,
"panels" : [ ] ,
"title" : "Prefill" ,
"type" : "row"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "s"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 12 ,
"x" : 0 ,
"y" : 49
} ,
"id" : 7 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Prefill Quantiles" ,
"type" : "timeseries"
} ,
{
"cards" : { } ,
"color" : {
"cardColor" : "#5794F2" ,
"colorScale" : "linear" ,
"colorScheme" : "interpolateSpectral" ,
"exponent" : 0.5 ,
"min" : 0 ,
"mode" : "opacity"
} ,
"dataFormat" : "tsbuckets" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"custom" : {
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"scaleDistribution" : {
"type" : "linear"
}
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 12 ,
"x" : 12 ,
"y" : 49
} ,
"heatmap" : { } ,
"hideZeroBuckets" : false ,
"highlightCards" : true ,
"id" : 14 ,
"legend" : {
"show" : false
} ,
"maxDataPoints" : 25 ,
"options" : {
"calculate" : false ,
"calculation" : { } ,
"cellGap" : 2 ,
"cellValues" : { } ,
"color" : {
"exponent" : 0.5 ,
"fill" : "#5794F2" ,
"min" : 0 ,
"mode" : "scheme" ,
"reverse" : false ,
"scale" : "exponential" ,
"scheme" : "Spectral" ,
"steps" : 128
} ,
"exemplars" : {
"color" : "rgba(255,0,255,0.7)"
} ,
"filterValues" : {
"le" : 1e-9
} ,
"legend" : {
"show" : false
} ,
"rowsFrame" : {
"layout" : "auto"
} ,
"showValue" : "never" ,
"tooltip" : {
"mode" : "single" ,
"showColorScale" : false ,
"yHistogram" : false
} ,
"yAxis" : {
"axisPlacement" : "left" ,
"decimals" : 1 ,
"reverse" : false ,
"unit" : "s"
}
} ,
"pluginVersion" : "10.4.2" ,
"reverseYBuckets" : false ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"exemplar" : true ,
"expr" : "sum(increase(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[5m])) by (le)" ,
"format" : "heatmap" ,
"interval" : "" ,
"legendFormat" : "{{ le }}" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Prefill Latency" ,
"tooltip" : {
"show" : true ,
"showHistogram" : false
} ,
"type" : "heatmap" ,
"xAxis" : {
"show" : true
} ,
"yAxis" : {
"decimals" : 1 ,
"format" : "s" ,
"logBase" : 1 ,
"show" : true
} ,
"yBucketBound" : "auto"
} ,
{
"collapsed" : false ,
"gridPos" : {
"h" : 1 ,
"w" : 24 ,
"x" : 0 ,
"y" : 60
} ,
"id" : 24 ,
"panels" : [ ] ,
"title" : "Decode" ,
"type" : "row"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "s"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 12 ,
"x" : 0 ,
"y" : 61
} ,
"id" : 11 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Decode quantiles" ,
"type" : "timeseries"
} ,
{
"cards" : { } ,
"color" : {
"cardColor" : "#5794F2" ,
"colorScale" : "linear" ,
"colorScheme" : "interpolateSpectral" ,
"exponent" : 0.5 ,
"min" : 0 ,
"mode" : "opacity"
} ,
"dataFormat" : "tsbuckets" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"custom" : {
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"scaleDistribution" : {
"type" : "linear"
}
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 12 ,
"x" : 12 ,
"y" : 61
} ,
"heatmap" : { } ,
"hideZeroBuckets" : false ,
"highlightCards" : true ,
"id" : 15 ,
"legend" : {
"show" : false
} ,
"maxDataPoints" : 25 ,
"options" : {
"calculate" : false ,
"calculation" : { } ,
"cellGap" : 2 ,
"cellValues" : { } ,
"color" : {
"exponent" : 0.5 ,
"fill" : "#5794F2" ,
"min" : 0 ,
"mode" : "scheme" ,
"reverse" : false ,
"scale" : "exponential" ,
"scheme" : "Spectral" ,
"steps" : 128
} ,
"exemplars" : {
"color" : "rgba(255,0,255,0.7)"
} ,
"filterValues" : {
"le" : 1e-9
} ,
"legend" : {
"show" : false
} ,
"rowsFrame" : {
"layout" : "auto"
} ,
"showValue" : "never" ,
"tooltip" : {
"mode" : "single" ,
"showColorScale" : false ,
"yHistogram" : false
} ,
"yAxis" : {
"axisPlacement" : "left" ,
"decimals" : 1 ,
"reverse" : false ,
"unit" : "s"
}
} ,
"pluginVersion" : "10.4.2" ,
"reverseYBuckets" : false ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"exemplar" : true ,
"expr" : "sum(increase(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)" ,
"format" : "heatmap" ,
"interval" : "" ,
"legendFormat" : "{{ le }}" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Decode Latency" ,
"tooltip" : {
"show" : true ,
"showHistogram" : false
} ,
"type" : "heatmap" ,
"xAxis" : {
"show" : true
} ,
"yAxis" : {
"decimals" : 1 ,
"format" : "s" ,
"logBase" : 1 ,
"show" : true
} ,
"yBucketBound" : "auto"
} ,
{
"collapsed" : false ,
"gridPos" : {
"h" : 1 ,
"w" : 24 ,
"x" : 0 ,
"y" : 72
} ,
"id" : 43 ,
"panels" : [ ] ,
"title" : "Debug" ,
"type" : "row"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "s"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 6 ,
"x" : 0 ,
"y" : 73
} ,
"id" : 38 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Forward quantiles" ,
"type" : "timeseries"
} ,
{
"cards" : { } ,
"color" : {
"cardColor" : "#5794F2" ,
"colorScale" : "linear" ,
"colorScheme" : "interpolateSpectral" ,
"exponent" : 0.5 ,
"min" : 0 ,
"mode" : "opacity"
} ,
"dataFormat" : "tsbuckets" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"custom" : {
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"scaleDistribution" : {
"type" : "linear"
}
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 6 ,
"x" : 6 ,
"y" : 73
} ,
"heatmap" : { } ,
"hideZeroBuckets" : false ,
"highlightCards" : true ,
"id" : 35 ,
"legend" : {
"show" : false
} ,
"maxDataPoints" : 25 ,
"options" : {
"calculate" : false ,
"calculation" : { } ,
"cellGap" : 2 ,
"cellValues" : { } ,
"color" : {
"exponent" : 0.5 ,
"fill" : "#5794F2" ,
"min" : 0 ,
"mode" : "scheme" ,
"reverse" : false ,
"scale" : "exponential" ,
"scheme" : "Spectral" ,
"steps" : 128
} ,
"exemplars" : {
"color" : "rgba(255,0,255,0.7)"
} ,
"filterValues" : {
"le" : 1e-9
} ,
"legend" : {
"show" : false
} ,
"rowsFrame" : {
"layout" : "auto"
} ,
"showValue" : "never" ,
"tooltip" : {
"mode" : "single" ,
"showColorScale" : false ,
"yHistogram" : false
} ,
"yAxis" : {
"axisPlacement" : "left" ,
"decimals" : 1 ,
"reverse" : false ,
"unit" : "s"
}
} ,
"pluginVersion" : "10.4.2" ,
"reverseYBuckets" : false ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"exemplar" : true ,
"expr" : "sum(increase(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)" ,
"format" : "heatmap" ,
"interval" : "" ,
"legendFormat" : "{{ le }}" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Forward Latency" ,
"tooltip" : {
"show" : true ,
"showHistogram" : false
} ,
"type" : "heatmap" ,
"xAxis" : {
"show" : true
} ,
"yAxis" : {
"decimals" : 1 ,
"format" : "s" ,
"logBase" : 1 ,
"show" : true
} ,
"yBucketBound" : "auto"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "s"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 6 ,
"x" : 12 ,
"y" : 73
} ,
"id" : 34 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Token Decode quantiles" ,
"type" : "timeseries"
} ,
{
"cards" : { } ,
"color" : {
"cardColor" : "#5794F2" ,
"colorScale" : "linear" ,
"colorScheme" : "interpolateSpectral" ,
"exponent" : 0.5 ,
"min" : 0 ,
"mode" : "opacity"
} ,
"dataFormat" : "tsbuckets" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"custom" : {
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"scaleDistribution" : {
"type" : "linear"
}
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 6 ,
"x" : 18 ,
"y" : 73
} ,
"heatmap" : { } ,
"hideZeroBuckets" : false ,
"highlightCards" : true ,
"id" : 40 ,
"legend" : {
"show" : false
} ,
"maxDataPoints" : 25 ,
"options" : {
"calculate" : false ,
"calculation" : { } ,
"cellGap" : 2 ,
"cellValues" : { } ,
"color" : {
"exponent" : 0.5 ,
"fill" : "#5794F2" ,
"min" : 0 ,
"mode" : "scheme" ,
"reverse" : false ,
"scale" : "exponential" ,
"scheme" : "Spectral" ,
"steps" : 128
} ,
"exemplars" : {
"color" : "rgba(255,0,255,0.7)"
} ,
"filterValues" : {
"le" : 1e-9
} ,
"legend" : {
"show" : false
} ,
"rowsFrame" : {
"layout" : "auto"
} ,
"showValue" : "never" ,
"tooltip" : {
"mode" : "single" ,
"showColorScale" : false ,
"yHistogram" : false
} ,
"yAxis" : {
"axisPlacement" : "left" ,
"decimals" : 1 ,
"reverse" : false ,
"unit" : "s"
}
} ,
"pluginVersion" : "10.4.2" ,
"reverseYBuckets" : false ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"exemplar" : true ,
"expr" : "sum(increase(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)" ,
"format" : "heatmap" ,
"interval" : "" ,
"legendFormat" : "{{ le }}" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Token Decode Latency" ,
"tooltip" : {
"show" : true ,
"showHistogram" : false
} ,
"type" : "heatmap" ,
"xAxis" : {
"show" : true
} ,
"yAxis" : {
"decimals" : 1 ,
"format" : "s" ,
"logBase" : 1 ,
"show" : true
} ,
"yBucketBound" : "auto"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "s"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 6 ,
"x" : 0 ,
"y" : 84
} ,
"id" : 42 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Filter Batch quantiles" ,
"type" : "timeseries"
} ,
{
"cards" : { } ,
"color" : {
"cardColor" : "#5794F2" ,
"colorScale" : "linear" ,
"colorScheme" : "interpolateSpectral" ,
"exponent" : 0.5 ,
"min" : 0 ,
"mode" : "opacity"
} ,
"dataFormat" : "tsbuckets" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"custom" : {
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"scaleDistribution" : {
"type" : "linear"
}
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 6 ,
"x" : 6 ,
"y" : 84
} ,
"heatmap" : { } ,
"hideZeroBuckets" : false ,
"highlightCards" : true ,
"id" : 39 ,
"legend" : {
"show" : false
} ,
"maxDataPoints" : 25 ,
"options" : {
"calculate" : false ,
"calculation" : { } ,
"cellGap" : 2 ,
"cellValues" : { } ,
"color" : {
"exponent" : 0.5 ,
"fill" : "#5794F2" ,
"min" : 0 ,
"mode" : "scheme" ,
"reverse" : false ,
"scale" : "exponential" ,
"scheme" : "Spectral" ,
"steps" : 128
} ,
"exemplars" : {
"color" : "rgba(255,0,255,0.7)"
} ,
"filterValues" : {
"le" : 1e-9
} ,
"legend" : {
"show" : false
} ,
"rowsFrame" : {
"layout" : "auto"
} ,
"showValue" : "never" ,
"tooltip" : {
"mode" : "single" ,
"showColorScale" : false ,
"yHistogram" : false
} ,
"yAxis" : {
"axisPlacement" : "left" ,
"decimals" : 1 ,
"reverse" : false ,
"unit" : "s"
}
} ,
"pluginVersion" : "10.4.2" ,
"reverseYBuckets" : false ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"exemplar" : true ,
"expr" : "sum(increase(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)" ,
"format" : "heatmap" ,
"interval" : "" ,
"legendFormat" : "{{ le }}" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Filter Batch Latency" ,
"tooltip" : {
"show" : true ,
"showHistogram" : false
} ,
"type" : "heatmap" ,
"xAxis" : {
"show" : true
} ,
"yAxis" : {
"decimals" : 1 ,
"format" : "s" ,
"logBase" : 1 ,
"show" : true
} ,
"yBucketBound" : "auto"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"color" : {
"mode" : "palette-classic"
} ,
"custom" : {
"axisBorderShow" : false ,
"axisCenteredZero" : false ,
"axisColorMode" : "text" ,
"axisLabel" : "" ,
"axisPlacement" : "auto" ,
"barAlignment" : 0 ,
"drawStyle" : "line" ,
"fillOpacity" : 0 ,
"gradientMode" : "none" ,
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"insertNulls" : false ,
"lineInterpolation" : "linear" ,
"lineWidth" : 1 ,
"pointSize" : 5 ,
"scaleDistribution" : {
"type" : "linear"
} ,
"showPoints" : "never" ,
"spanNulls" : false ,
"stacking" : {
"group" : "A" ,
"mode" : "none"
} ,
"thresholdsStyle" : {
"mode" : "off"
}
} ,
"mappings" : [ ] ,
"thresholds" : {
"mode" : "absolute" ,
"steps" : [
{
"color" : "green" ,
"value" : null
} ,
{
"color" : "red" ,
"value" : 80
}
]
} ,
"unit" : "s"
} ,
"overrides" : [
{
"matcher" : {
"id" : "byName" ,
"options" : "p50"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "green" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p90"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "orange" ,
"mode" : "fixed"
}
}
]
} ,
{
"matcher" : {
"id" : "byName" ,
"options" : "p99"
} ,
"properties" : [
{
"id" : "color" ,
"value" : {
"fixedColor" : "red" ,
"mode" : "fixed"
}
}
]
}
]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 6 ,
"x" : 12 ,
"y" : 84
} ,
"id" : 36 ,
"options" : {
"legend" : {
"calcs" : [
"min" ,
"max"
] ,
"displayMode" : "list" ,
"placement" : "bottom" ,
"showLegend" : true
} ,
"tooltip" : {
"mode" : "single" ,
"sort" : "none"
}
} ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"legendFormat" : "p50" ,
"range" : true ,
"refId" : "A"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p90" ,
"range" : true ,
"refId" : "B"
} ,
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"expr" : "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))" ,
"hide" : false ,
"legendFormat" : "p99" ,
"range" : true ,
"refId" : "C"
}
] ,
"title" : "Batch Concat quantiles" ,
"type" : "timeseries"
} ,
{
"cards" : { } ,
"color" : {
"cardColor" : "#5794F2" ,
"colorScale" : "linear" ,
"colorScheme" : "interpolateSpectral" ,
"exponent" : 0.5 ,
"min" : 0 ,
"mode" : "opacity"
} ,
"dataFormat" : "tsbuckets" ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"fieldConfig" : {
"defaults" : {
"custom" : {
"hideFrom" : {
"legend" : false ,
"tooltip" : false ,
"viz" : false
} ,
"scaleDistribution" : {
"type" : "linear"
}
}
} ,
"overrides" : [ ]
} ,
"gridPos" : {
"h" : 11 ,
"w" : 6 ,
"x" : 18 ,
"y" : 84
} ,
"heatmap" : { } ,
"hideZeroBuckets" : false ,
"highlightCards" : true ,
"id" : 41 ,
"legend" : {
"show" : false
} ,
"maxDataPoints" : 25 ,
"options" : {
"calculate" : false ,
"calculation" : { } ,
"cellGap" : 2 ,
"cellValues" : { } ,
"color" : {
"exponent" : 0.5 ,
"fill" : "#5794F2" ,
"min" : 0 ,
"mode" : "scheme" ,
"reverse" : false ,
"scale" : "exponential" ,
"scheme" : "Spectral" ,
"steps" : 128
} ,
"exemplars" : {
"color" : "rgba(255,0,255,0.7)"
} ,
"filterValues" : {
"le" : 1e-9
} ,
"legend" : {
"show" : false
} ,
"rowsFrame" : {
"layout" : "auto"
} ,
"showValue" : "never" ,
"tooltip" : {
"mode" : "single" ,
"showColorScale" : false ,
"yHistogram" : false
} ,
"yAxis" : {
"axisPlacement" : "left" ,
"decimals" : 1 ,
"reverse" : false ,
"unit" : "s"
}
} ,
"pluginVersion" : "10.4.2" ,
"reverseYBuckets" : false ,
"targets" : [
{
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"editorMode" : "code" ,
"exemplar" : true ,
"expr" : "sum(increase(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)" ,
"format" : "heatmap" ,
"interval" : "" ,
"legendFormat" : "{{ le }}" ,
"range" : true ,
"refId" : "A"
}
] ,
"title" : "Batch Concat latency" ,
"tooltip" : {
"show" : true ,
"showHistogram" : false
} ,
"type" : "heatmap" ,
"xAxis" : {
"show" : true
} ,
"yAxis" : {
"decimals" : 1 ,
"format" : "s" ,
"logBase" : 1 ,
"show" : true
} ,
"yBucketBound" : "auto"
}
] ,
"refresh" : "" ,
"schemaVersion" : 39 ,
"tags" : [ ] ,
"templating" : {
"list" : [
{
"current" : {
"selected" : false ,
"text" : "gpu-txt-gen-cohereforai-c4ai-command-r-plu-ba7f1" ,
"value" : "gpu-txt-gen-cohereforai-c4ai-command-r-plu-ba7f1"
} ,
"datasource" : {
"type" : "prometheus" ,
"uid" : "${DS_PROMETHEUS_EKS API INFERENCE PROD}"
} ,
"definition" : "label_values(tgi_request_count, container)" ,
"hide" : 0 ,
"includeAll" : false ,
"multi" : false ,
"name" : "service" ,
"options" : [ ] ,
"query" : {
"query" : "label_values(tgi_request_count, container)" ,
"refId" : "StandardVariableQuery"
} ,
"refresh" : 1 ,
"regex" : "" ,
"skipUrlSync" : false ,
"sort" : 1 ,
"type" : "query"
}
]
} ,
"time" : {
"from" : "now-30m" ,
"to" : "now-30s"
} ,
"timepicker" : {
"nowDelay" : "30s"
} ,
"timezone" : "" ,
"title" : "Text Generation Inference" ,
"uid" : "RHSk7EL4kdqsd" ,
"version" : 12 ,
"weekStart" : ""
}