Max token capacity metric (#2595)
* adding max_token_capacity_metric * added tgi to name of metric * Adding max capacity metric. * Add description for the metrics --------- Co-authored-by: Edwinhr716 <Edandres249@gmail.com>
This commit is contained in:
parent
d18ed5cfc5
commit
0204946d26
|
@ -100,6 +100,7 @@ pub async fn connect_backend(
|
|||
.map_err(V3Error::Warmup)?,
|
||||
)?;
|
||||
tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
|
||||
metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens);
|
||||
|
||||
let backend_info = BackendInfo {
|
||||
waiting_served_ratio,
|
||||
|
|
|
@ -1937,6 +1937,11 @@ async fn start(
|
|||
metrics::Unit::Count,
|
||||
"Maximum tokens for the current batch"
|
||||
);
|
||||
metrics::describe_gauge!(
|
||||
"tgi_batch_total_tokens",
|
||||
metrics::Unit::Count,
|
||||
"Maximum amount of tokens in total."
|
||||
);
|
||||
metrics::describe_histogram!(
|
||||
"tgi_request_max_new_tokens",
|
||||
metrics::Unit::Count,
|
||||
|
|
Loading…
Reference in New Issue