Max token capacity metric (#2595)

* adding max_token_capacity_metric

* added tgi to name of metric

* Adding max capacity metric.

* Add description for the metrics

---------

Co-authored-by: Edwinhr716 <Edandres249@gmail.com>
This commit is contained in:
Nicolas Patry 2024-10-02 16:32:36 +02:00 committed by GitHub
parent d18ed5cfc5
commit 0204946d26
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 0 deletions

View File

@ -100,6 +100,7 @@ pub async fn connect_backend(
.map_err(V3Error::Warmup)?,
)?;
tracing::info!("Setting max batch total tokens to {max_batch_total_tokens}");
metrics::gauge!("tgi_batch_max_total_tokens").set(max_batch_total_tokens);
let backend_info = BackendInfo {
waiting_served_ratio,

View File

@ -1937,6 +1937,11 @@ async fn start(
metrics::Unit::Count,
"Maximum tokens for the current batch"
);
metrics::describe_gauge!(
"tgi_batch_total_tokens",
metrics::Unit::Count,
"Maximum amount of tokens in total."
);
metrics::describe_histogram!(
"tgi_request_max_new_tokens",
metrics::Unit::Count,