Max token capacity metric (#2595)
* adding max_token_capacity_metric * added tgi to name of metric * Adding max capacity metric. * Add description for the metrics --------- Co-authored-by: Edwinhr716 <Edandres249@gmail.com>
This commit is contained in:
parent
d18ed5cfc5
commit
0204946d26
|
@ -100,6 +100,7 @@ pub async fn connect_backend(
|
||||||
.map_err(V3Error::Warmup)?,
|
.map_err(V3Error::Warmup)?,
|
||||||
)?;
|
)?;
|
||||||
tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
|
tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
|
||||||
|
metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens);
|
||||||
|
|
||||||
let backend_info = BackendInfo {
|
let backend_info = BackendInfo {
|
||||||
waiting_served_ratio,
|
waiting_served_ratio,
|
||||||
|
|
|
@ -1937,6 +1937,11 @@ async fn start(
|
||||||
metrics::Unit::Count,
|
metrics::Unit::Count,
|
||||||
"Maximum tokens for the current batch"
|
"Maximum tokens for the current batch"
|
||||||
);
|
);
|
||||||
|
metrics::describe_gauge!(
|
||||||
|
"tgi_batch_total_tokens",
|
||||||
|
metrics::Unit::Count,
|
||||||
|
"Maximum amount of tokens in total."
|
||||||
|
);
|
||||||
metrics::describe_histogram!(
|
metrics::describe_histogram!(
|
||||||
"tgi_request_max_new_tokens",
|
"tgi_request_max_new_tokens",
|
||||||
metrics::Unit::Count,
|
metrics::Unit::Count,
|
||||||
|
|
Loading…
Reference in New Issue