{ "__inputs": [ { "name": "DS_PROMETHEUS_EKS API INFERENCE PROD", "label": "Prometheus EKS API Inference Prod", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" } ], "__elements": {}, "__requires": [ { "type": "panel", "id": "gauge", "name": "Gauge", "version": "" }, { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "10.0.2" }, { "type": "panel", "id": "heatmap", "name": "Heatmap", "version": "" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" }, { "type": "panel", "id": "timeseries", "name": "Time series", "version": "" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "target": { "limit": 100, "matchAny": false, "tags": [], "type": "dashboard" }, "type": "dashboard" } ] }, "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 2, "id": 551, "links": [], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "fieldMinMax": false, "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1000 } ] }, "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 0, "y": 0 }, "id": 49, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "mean" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "10.4.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m]))) * 1000) > 0", "hide": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m]))) * 1000) > 0", "hide": true, "instant": false, "legendFormat": "__auto", "range": true, "refId": "C" }, { "datasource": { "name": "Expression", "type": "__expr__", "uid": "__expr__" }, "expression": "$B + $C", "hide": false, "refId": "D", "type": "math" } ], "title": "Time to first token", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 7, "w": 8, "x": 9, "y": 0 }, "id": 44, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "mean" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "10.4.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "(histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m]))) * 1000)>0", "instant": false, "range": true, "refId": "A" } ], "title": "Decode per-token latency", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 7, "w": 7, "x": 17, "y": 0 }, "id": 45, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "mean" ], "fields": "", "values": false }, "showPercentChange": false, "textMode": "auto", "wideLayout": true }, "pluginVersion": "10.4.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "sum((rate(tgi_request_generated_tokens_sum{container=\"$service\"}[10m]) / rate(tgi_request_generated_tokens_count{container=\"$service\"}[10m]))>0)", "instant": false, "range": true, "refId": "A" } ], "title": "Throughput (generated tok/s)", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "none" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 7 }, "id": 48, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Number of tokens per prompt", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "none" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 7 }, "id": 30, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_generated_tokens_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Number of generated tokens per request", "type": "timeseries" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 15 }, "id": 20, "panels": [], "title": "General", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 6, "x": 0, "y": 16 }, "id": 4, "maxDataPoints": 100, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "sum(increase(tgi_request_success{container=\"$service\"}[1m]))", "legendFormat": "Success", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "sum(increase(tgi_request_failure{container=\"$service\"}[1m])) by (err)", "hide": false, "legendFormat": "Error: {{err}}", "range": true, "refId": "B" } ], "title": "Requests", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 13, "w": 9, "x": 6, "y": 16 }, "id": 6, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Mean Time Per Token quantiles", "type": "timeseries" }, { "cards": {}, "color": { "cardColor": "#5794F2", "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, "min": 0, "mode": "opacity" }, "dataFormat": "tsbuckets", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "scaleDistribution": { "type": "linear" } } }, "overrides": [] }, "gridPos": { "h": 13, "w": 9, "x": 15, "y": 16 }, "heatmap": {}, "hideZeroBuckets": false, "highlightCards": true, "id": 13, "legend": { "show": false }, "maxDataPoints": 25, "options": { "calculate": false, "calculation": {}, "cellGap": 2, "cellValues": {}, "color": { "exponent": 0.5, "fill": "#5794F2", "min": 0, "mode": "scheme", "reverse": false, "scale": "exponential", "scheme": "Spectral", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-9 }, "legend": { "show": false }, "rowsFrame": { "layout": "auto" }, "showValue": "never", "tooltip": { "mode": "single", "showColorScale": false, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "decimals": 1, "reverse": false, "unit": "s" } }, "pluginVersion": "10.4.2", "reverseYBuckets": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "exemplar": true, "expr": "sum(increase(tgi_request_mean_time_per_token_duration_bucket{container=\"$service\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", "range": true, "refId": "A" } ], "title": "Mean Time Per Token", "tooltip": { "show": true, "showHistogram": false }, "type": "heatmap", "xAxis": { "show": true }, "yAxis": { "decimals": 1, "format": "s", "logBase": 1, "show": true }, "yBucketBound": "auto" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "percentage", "steps": [ { "color": "green", "value": null }, { "color": "orange", "value": 70 }, { "color": "red", "value": 85 } ] } }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 0, "y": 24 }, "id": 18, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": false }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "9.1.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "count(tgi_request_count{container=\"$service\"})", "legendFormat": "Replicas", "range": true, "refId": "A" } ], "title": "Number of replicas", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "mappings": [], "thresholds": { "mode": "percentage", "steps": [ { "color": "green", "value": null }, { "color": "orange", "value": 70 }, { "color": "red", "value": 85 } ] } }, "overrides": [] }, "gridPos": { "h": 5, "w": 3, "x": 3, "y": 24 }, "id": 32, "options": { "minVizHeight": 75, "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true, "sizing": "auto" }, "pluginVersion": "10.4.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "sum(tgi_queue_size{container=\"$service\"})", "legendFormat": "__auto", "range": true, "refId": "A" } ], "title": "Queue Size", "type": "gauge" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 29 }, "id": 26, "panels": [], "title": "Batching", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "bars", "fillOpacity": 50, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 5, "w": 6, "x": 0, "y": 30 }, "id": 29, "maxDataPoints": 40, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": false }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "9.1.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "avg(tgi_batch_current_max_tokens{container=\"$service\"})", "legendFormat": "{{ pod }}", "range": true, "refId": "A" } ], "title": "Max tokens per batch", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "none" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 9, "w": 4, "x": 6, "y": 30 }, "id": 33, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_skipped_tokens_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Speculated Tokens", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "none" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 9, "w": 5, "x": 10, "y": 30 }, "id": 46, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_input_length_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Prompt Tokens", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 9, "w": 9, "x": 15, "y": 30 }, "id": 8, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_duration_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Latency quantiles", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "bars", "fillOpacity": 50, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 4, "w": 6, "x": 0, "y": 35 }, "id": 27, "maxDataPoints": 40, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": false }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "9.1.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "avg(tgi_batch_current_size{container=\"$service\"})", "legendFormat": "{{ pod }}", "range": true, "refId": "A" } ], "title": "Batch Size", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 30, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 9, "w": 6, "x": 0, "y": 39 }, "id": 28, "maxDataPoints": 100, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "sum(increase(tgi_batch_concat{container=\"$service\"}[1m])) by (reason)", "hide": false, "legendFormat": "Reason: {{ reason }}", "range": true, "refId": "B" } ], "title": "Concatenates", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 9, "w": 9, "x": 6, "y": 39 }, "id": 31, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_request_queue_duration_bucket{container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Queue quantiles", "type": "timeseries" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 48 }, "id": 22, "panels": [], "title": "Prefill", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 11, "w": 12, "x": 0, "y": 49 }, "id": 7, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Prefill Quantiles", "type": "timeseries" }, { "cards": {}, "color": { "cardColor": "#5794F2", "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, "min": 0, "mode": "opacity" }, "dataFormat": "tsbuckets", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "scaleDistribution": { "type": "linear" } } }, "overrides": [] }, "gridPos": { "h": 11, "w": 12, "x": 12, "y": 49 }, "heatmap": {}, "hideZeroBuckets": false, "highlightCards": true, "id": 14, "legend": { "show": false }, "maxDataPoints": 25, "options": { "calculate": false, "calculation": {}, "cellGap": 2, "cellValues": {}, "color": { "exponent": 0.5, "fill": "#5794F2", "min": 0, "mode": "scheme", "reverse": false, "scale": "exponential", "scheme": "Spectral", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-9 }, "legend": { "show": false }, "rowsFrame": { "layout": "auto" }, "showValue": "never", "tooltip": { "mode": "single", "showColorScale": false, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "decimals": 1, "reverse": false, "unit": "s" } }, "pluginVersion": "10.4.2", "reverseYBuckets": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "exemplar": true, "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"prefill\", container=\"$service\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", "range": true, "refId": "A" } ], "title": "Prefill Latency", "tooltip": { "show": true, "showHistogram": false }, "type": "heatmap", "xAxis": { "show": true }, "yAxis": { "decimals": 1, "format": "s", "logBase": 1, "show": true }, "yBucketBound": "auto" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 60 }, "id": 24, "panels": [], "title": "Decode", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 11, "w": 12, "x": 0, "y": 61 }, "id": 11, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Decode quantiles", "type": "timeseries" }, { "cards": {}, "color": { "cardColor": "#5794F2", "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, "min": 0, "mode": "opacity" }, "dataFormat": "tsbuckets", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "scaleDistribution": { "type": "linear" } } }, "overrides": [] }, "gridPos": { "h": 11, "w": 12, "x": 12, "y": 61 }, "heatmap": {}, "hideZeroBuckets": false, "highlightCards": true, "id": 15, "legend": { "show": false }, "maxDataPoints": 25, "options": { "calculate": false, "calculation": {}, "cellGap": 2, "cellValues": {}, "color": { "exponent": 0.5, "fill": "#5794F2", "min": 0, "mode": "scheme", "reverse": false, "scale": "exponential", "scheme": "Spectral", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-9 }, "legend": { "show": false }, "rowsFrame": { "layout": "auto" }, "showValue": "never", "tooltip": { "mode": "single", "showColorScale": false, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "decimals": 1, "reverse": false, "unit": "s" } }, "pluginVersion": "10.4.2", "reverseYBuckets": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "exemplar": true, "expr": "sum(increase(tgi_batch_inference_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", "range": true, "refId": "A" } ], "title": "Decode Latency", "tooltip": { "show": true, "showHistogram": false }, "type": "heatmap", "xAxis": { "show": true }, "yAxis": { "decimals": 1, "format": "s", "logBase": 1, "show": true }, "yBucketBound": "auto" }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 72 }, "id": 43, "panels": [], "title": "Debug", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 11, "w": 6, "x": 0, "y": 73 }, "id": 38, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Forward quantiles", "type": "timeseries" }, { "cards": {}, "color": { "cardColor": "#5794F2", "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, "min": 0, "mode": "opacity" }, "dataFormat": "tsbuckets", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "scaleDistribution": { "type": "linear" } } }, "overrides": [] }, "gridPos": { "h": 11, "w": 6, "x": 6, "y": 73 }, "heatmap": {}, "hideZeroBuckets": false, "highlightCards": true, "id": 35, "legend": { "show": false }, "maxDataPoints": 25, "options": { "calculate": false, "calculation": {}, "cellGap": 2, "cellValues": {}, "color": { "exponent": 0.5, "fill": "#5794F2", "min": 0, "mode": "scheme", "reverse": false, "scale": "exponential", "scheme": "Spectral", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-9 }, "legend": { "show": false }, "rowsFrame": { "layout": "auto" }, "showValue": "never", "tooltip": { "mode": "single", "showColorScale": false, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "decimals": 1, "reverse": false, "unit": "s" } }, "pluginVersion": "10.4.2", "reverseYBuckets": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "exemplar": true, "expr": "sum(increase(tgi_batch_forward_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", "range": true, "refId": "A" } ], "title": "Forward Latency", "tooltip": { "show": true, "showHistogram": false }, "type": "heatmap", "xAxis": { "show": true }, "yAxis": { "decimals": 1, "format": "s", "logBase": 1, "show": true }, "yBucketBound": "auto" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 11, "w": 6, "x": 12, "y": 73 }, "id": 34, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Token Decode quantiles", "type": "timeseries" }, { "cards": {}, "color": { "cardColor": "#5794F2", "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, "min": 0, "mode": "opacity" }, "dataFormat": "tsbuckets", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "scaleDistribution": { "type": "linear" } } }, "overrides": [] }, "gridPos": { "h": 11, "w": 6, "x": 18, "y": 73 }, "heatmap": {}, "hideZeroBuckets": false, "highlightCards": true, "id": 40, "legend": { "show": false }, "maxDataPoints": 25, "options": { "calculate": false, "calculation": {}, "cellGap": 2, "cellValues": {}, "color": { "exponent": 0.5, "fill": "#5794F2", "min": 0, "mode": "scheme", "reverse": false, "scale": "exponential", "scheme": "Spectral", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-9 }, "legend": { "show": false }, "rowsFrame": { "layout": "auto" }, "showValue": "never", "tooltip": { "mode": "single", "showColorScale": false, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "decimals": 1, "reverse": false, "unit": "s" } }, "pluginVersion": "10.4.2", "reverseYBuckets": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "exemplar": true, "expr": "sum(increase(tgi_batch_decode_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", "range": true, "refId": "A" } ], "title": "Token Decode Latency", "tooltip": { "show": true, "showHistogram": false }, "type": "heatmap", "xAxis": { "show": true }, "yAxis": { "decimals": 1, "format": "s", "logBase": 1, "show": true }, "yBucketBound": "auto" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 11, "w": 6, "x": 0, "y": 84 }, "id": 42, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Filter Batch quantiles", "type": "timeseries" }, { "cards": {}, "color": { "cardColor": "#5794F2", "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, "min": 0, "mode": "opacity" }, "dataFormat": "tsbuckets", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "scaleDistribution": { "type": "linear" } } }, "overrides": [] }, "gridPos": { "h": 11, "w": 6, "x": 6, "y": 84 }, "heatmap": {}, "hideZeroBuckets": false, "highlightCards": true, "id": 39, "legend": { "show": false }, "maxDataPoints": 25, "options": { "calculate": false, "calculation": {}, "cellGap": 2, "cellValues": {}, "color": { "exponent": 0.5, "fill": "#5794F2", "min": 0, "mode": "scheme", "reverse": false, "scale": "exponential", "scheme": "Spectral", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-9 }, "legend": { "show": false }, "rowsFrame": { "layout": "auto" }, "showValue": "never", "tooltip": { "mode": "single", "showColorScale": false, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "decimals": 1, "reverse": false, "unit": "s" } }, "pluginVersion": "10.4.2", "reverseYBuckets": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "exemplar": true, "expr": "sum(increase(tgi_batch_filter_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", "range": true, "refId": "A" } ], "title": "Filter Batch Latency", "tooltip": { "show": true, "showHistogram": false }, "type": "heatmap", "xAxis": { "show": true }, "yAxis": { "decimals": 1, "format": "s", "logBase": 1, "show": true }, "yBucketBound": "auto" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [ { "matcher": { "id": "byName", "options": "p50" }, "properties": [ { "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p90" }, "properties": [ { "id": "color", "value": { "fixedColor": "orange", "mode": "fixed" } } ] }, { "matcher": { "id": "byName", "options": "p99" }, "properties": [ { "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } } ] } ] }, "gridPos": { "h": 11, "w": 6, "x": 12, "y": 84 }, "id": 36, "options": { "legend": { "calcs": [ "min", "max" ], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "legendFormat": "p50", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[10m])))", "hide": false, "legendFormat": "p99", "range": true, "refId": "C" } ], "title": "Batch Concat quantiles", "type": "timeseries" }, { "cards": {}, "color": { "cardColor": "#5794F2", "colorScale": "linear", "colorScheme": "interpolateSpectral", "exponent": 0.5, "min": 0, "mode": "opacity" }, "dataFormat": "tsbuckets", "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "fieldConfig": { "defaults": { "custom": { "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "scaleDistribution": { "type": "linear" } } }, "overrides": [] }, "gridPos": { "h": 11, "w": 6, "x": 18, "y": 84 }, "heatmap": {}, "hideZeroBuckets": false, "highlightCards": true, "id": 41, "legend": { "show": false }, "maxDataPoints": 25, "options": { "calculate": false, "calculation": {}, "cellGap": 2, "cellValues": {}, "color": { "exponent": 0.5, "fill": "#5794F2", "min": 0, "mode": "scheme", "reverse": false, "scale": "exponential", "scheme": "Spectral", "steps": 128 }, "exemplars": { "color": "rgba(255,0,255,0.7)" }, "filterValues": { "le": 1e-9 }, "legend": { "show": false }, "rowsFrame": { "layout": "auto" }, "showValue": "never", "tooltip": { "mode": "single", "showColorScale": false, "yHistogram": false }, "yAxis": { "axisPlacement": "left", "decimals": 1, "reverse": false, "unit": "s" } }, "pluginVersion": "10.4.2", "reverseYBuckets": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "editorMode": "code", "exemplar": true, "expr": "sum(increase(tgi_batch_concat_duration_bucket{method=\"decode\", container=\"$service\"}[5m])) by (le)", "format": "heatmap", "interval": "", "legendFormat": "{{ le }}", "range": true, "refId": "A" } ], "title": "Batch Concat latency", "tooltip": { "show": true, "showHistogram": false }, "type": "heatmap", "xAxis": { "show": true }, "yAxis": { "decimals": 1, "format": "s", "logBase": 1, "show": true }, "yBucketBound": "auto" } ], "refresh": "", "schemaVersion": 39, "tags": [], "templating": { "list": [ { "current": { "selected": false, "text": "gpu-txt-gen-cohereforai-c4ai-command-r-plu-ba7f1", "value": "gpu-txt-gen-cohereforai-c4ai-command-r-plu-ba7f1" }, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS_EKS API INFERENCE PROD}" }, "definition": "label_values(tgi_request_count, container)", "hide": 0, "includeAll": false, "multi": false, "name": "service", "options": [], "query": { "query": "label_values(tgi_request_count, container)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 1, "type": "query" } ] }, "time": { "from": "now-30m", "to": "now-30s" }, "timepicker": { "nowDelay": "30s" }, "timezone": "", "title": "Text Generation Inference", "uid": "RHSk7EL4kdqsd", "version": 12, "weekStart": "" }