refactor usage stats (#2339)
* refactor usage stats * Update docs/source/usage_statistics.md Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * Update router/src/server.rs Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * changes based on feedback * run python3 udpate_doc.py * fix pre-commit * Update router/src/server.rs Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * delete option around usage stats arg --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
parent
f7f61876cf
commit
7451041ecd
|
@ -1,5 +1,5 @@
|
|||
use clap::{Parser, Subcommand};
|
||||
use text_generation_router::server;
|
||||
use text_generation_router::{server, usage_stats};
|
||||
use text_generation_router_v3::{connect_backend, V3Error};
|
||||
use thiserror::Error;
|
||||
|
||||
|
@ -68,10 +68,8 @@ struct Args {
|
|||
disable_grammar_support: bool,
|
||||
#[clap(default_value = "4", long, env)]
|
||||
max_client_batch_size: usize,
|
||||
#[clap(long, env, default_value_t)]
|
||||
disable_usage_stats: bool,
|
||||
#[clap(long, env, default_value_t)]
|
||||
disable_crash_reports: bool,
|
||||
#[clap(default_value = "on", long, env)]
|
||||
usage_stats: usage_stats::UsageStatsLevel,
|
||||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
|
@ -114,9 +112,8 @@ async fn main() -> Result<(), RouterError> {
|
|||
ngrok_edge,
|
||||
messages_api_enabled,
|
||||
disable_grammar_support,
|
||||
disable_usage_stats,
|
||||
disable_crash_reports,
|
||||
max_client_batch_size,
|
||||
usage_stats,
|
||||
} = args;
|
||||
|
||||
if let Some(Commands::PrintSchema) = command {
|
||||
|
@ -188,8 +185,7 @@ async fn main() -> Result<(), RouterError> {
|
|||
messages_api_enabled,
|
||||
disable_grammar_support,
|
||||
max_client_batch_size,
|
||||
disable_usage_stats,
|
||||
disable_crash_reports,
|
||||
usage_stats,
|
||||
)
|
||||
.await?;
|
||||
Ok(())
|
||||
|
|
|
@ -431,20 +431,18 @@ Options:
|
|||
[env: LORA_ADAPTERS=]
|
||||
|
||||
```
|
||||
## DISABLE_USAGE_STATS
|
||||
## USAGE_STATS
|
||||
```shell
|
||||
--disable-usage-stats
|
||||
Disable sending of all usage statistics
|
||||
--usage-stats <USAGE_STATS>
|
||||
Control if anonymous usage stats are collected. Options are "on", "off" and "no-stack" Defaul is on
|
||||
|
||||
[env: DISABLE_USAGE_STATS=]
|
||||
[env: USAGE_STATS=]
|
||||
[default: on]
|
||||
|
||||
```
|
||||
## DISABLE_CRASH_REPORTS
|
||||
```shell
|
||||
--disable-crash-reports
|
||||
Disable sending of crash reports, but allow anonymous usage statistics
|
||||
|
||||
[env: DISABLE_CRASH_REPORTS=]
|
||||
Possible values:
|
||||
- on: Default option, usage statistics are collected anonymously
|
||||
- off: Disables all collection of usage statistics
|
||||
- no-stack: Doesn't send the error stack trace or error type, but allows sending a crash event
|
||||
|
||||
```
|
||||
## HELP
|
||||
|
|
|
@ -70,4 +70,6 @@ As of release 2.1.2 this is an example of the data collected:
|
|||
|
||||
## How to opt-out
|
||||
|
||||
You can easily opt out by passing the `--disable-usage-stats` to the text-generation-launcher command. This will disable all usage statistics. You can also pass `--disable-crash-reports` which disables sending specific crash reports, but allows anonymous usage statistics.
|
||||
By passing the `--usage-stats` to the text-generation-launcher you can control how much usage statistics are being collected.
|
||||
`--usage-stats=no-stack` will not emit the stack traces from errors and the error types, but will continue to send start and stop events
|
||||
`--usage-stats=off` will completely disable everything
|
||||
|
|
|
@ -168,6 +168,33 @@ impl std::fmt::Display for RopeScaling {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum)]
|
||||
pub enum UsageStatsLevel {
|
||||
/// Default option, usage statistics are collected anonymously
|
||||
On,
|
||||
/// Disables all collection of usage statistics
|
||||
Off,
|
||||
/// Doesn't send the error stack trace or error type, but allows sending a crash event
|
||||
NoStack,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for UsageStatsLevel {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// To keep in track with `server`.
|
||||
match self {
|
||||
UsageStatsLevel::On => {
|
||||
write!(f, "on")
|
||||
}
|
||||
UsageStatsLevel::Off => {
|
||||
write!(f, "off")
|
||||
}
|
||||
UsageStatsLevel::NoStack => {
|
||||
write!(f, "no-stack")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// App Configuration
|
||||
#[derive(Parser, Debug)]
|
||||
#[clap(author, version, about, long_about = None)]
|
||||
|
@ -466,13 +493,11 @@ struct Args {
|
|||
#[clap(long, env)]
|
||||
lora_adapters: Option<String>,
|
||||
|
||||
/// Disable sending of all usage statistics
|
||||
#[clap(default_value = "false", long, env)]
|
||||
disable_usage_stats: bool,
|
||||
|
||||
/// Disable sending of crash reports, but allow anonymous usage statistics
|
||||
#[clap(default_value = "false", long, env)]
|
||||
disable_crash_reports: bool,
|
||||
/// Control if anonymous usage stats are collected.
|
||||
/// Options are "on", "off" and "no-stack"
|
||||
/// Defaul is on.
|
||||
#[clap(default_value = "on", long, env)]
|
||||
usage_stats: UsageStatsLevel,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
@ -1218,12 +1243,8 @@ fn spawn_webserver(
|
|||
];
|
||||
|
||||
// Pass usage stats flags to router
|
||||
if args.disable_usage_stats {
|
||||
router_args.push("--disable-usage-stats".to_string());
|
||||
}
|
||||
if args.disable_crash_reports {
|
||||
router_args.push("--disable-crash-reports".to_string());
|
||||
}
|
||||
router_args.push("--usage-stats".to_string());
|
||||
router_args.push(args.usage_stats.to_string());
|
||||
|
||||
// Grammar support
|
||||
if args.disable_grammar_support {
|
||||
|
|
|
@ -7,14 +7,13 @@ use crate::kserve::{
|
|||
kerve_server_metadata, kserve_health_live, kserve_health_ready, kserve_model_infer,
|
||||
kserve_model_metadata, kserve_model_metadata_ready,
|
||||
};
|
||||
use crate::usage_stats;
|
||||
use crate::validation::ValidationError;
|
||||
use crate::{
|
||||
BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName, GenerateParameters,
|
||||
GenerateRequest, GenerateResponse, GrammarType, HubModelInfo, HubProcessorConfig,
|
||||
HubTokenizerConfig, Info, Message, MessageChunk, MessageContent, OutputMessage, PrefillToken,
|
||||
SimpleToken, StreamDetails, StreamResponse, TextMessage, Token, TokenizeResponse,
|
||||
ToolCallDelta, ToolCallMessage, Url, Usage, Validation,
|
||||
usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName,
|
||||
GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo,
|
||||
HubProcessorConfig, HubTokenizerConfig, Info, Message, MessageChunk, MessageContent,
|
||||
OutputMessage, PrefillToken, SimpleToken, StreamDetails, StreamResponse, TextMessage, Token,
|
||||
TokenizeResponse, ToolCallDelta, ToolCallMessage, Url, Usage, Validation,
|
||||
};
|
||||
use crate::{
|
||||
ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete,
|
||||
|
@ -1505,8 +1504,7 @@ pub async fn run(
|
|||
messages_api_enabled: bool,
|
||||
disable_grammar_support: bool,
|
||||
max_client_batch_size: usize,
|
||||
disable_usage_stats: bool,
|
||||
disable_crash_reports: bool,
|
||||
usage_stats_level: usage_stats::UsageStatsLevel,
|
||||
) -> Result<(), WebServerError> {
|
||||
// CORS allowed origins
|
||||
// map to go inside the option and then map to parse from String to HeaderValue
|
||||
|
@ -1698,33 +1696,32 @@ pub async fn run(
|
|||
|
||||
// Only send usage stats when TGI is run in container and the function returns Some
|
||||
let is_container = matches!(usage_stats::is_container(), Ok(true));
|
||||
|
||||
let user_agent = if !disable_usage_stats && is_container {
|
||||
let reduced_args = usage_stats::Args::new(
|
||||
config.clone(),
|
||||
tokenizer_config.tokenizer_class.clone(),
|
||||
max_concurrent_requests,
|
||||
max_best_of,
|
||||
max_stop_sequences,
|
||||
max_top_n_tokens,
|
||||
max_input_tokens,
|
||||
max_total_tokens,
|
||||
// waiting_served_ratio,
|
||||
// max_batch_prefill_tokens,
|
||||
// max_batch_total_tokens,
|
||||
// max_waiting_tokens,
|
||||
// max_batch_size,
|
||||
revision.clone(),
|
||||
validation_workers,
|
||||
messages_api_enabled,
|
||||
disable_grammar_support,
|
||||
max_client_batch_size,
|
||||
disable_usage_stats,
|
||||
disable_crash_reports,
|
||||
);
|
||||
Some(usage_stats::UserAgent::new(reduced_args))
|
||||
} else {
|
||||
None
|
||||
let user_agent = match (usage_stats_level, is_container) {
|
||||
(usage_stats::UsageStatsLevel::On | usage_stats::UsageStatsLevel::NoStack, true) => {
|
||||
let reduced_args = usage_stats::Args::new(
|
||||
config.clone(),
|
||||
tokenizer_config.tokenizer_class.clone(),
|
||||
max_concurrent_requests,
|
||||
max_best_of,
|
||||
max_stop_sequences,
|
||||
max_top_n_tokens,
|
||||
max_input_tokens,
|
||||
max_total_tokens,
|
||||
// waiting_served_ratio,
|
||||
// max_batch_prefill_tokens,
|
||||
// max_batch_total_tokens,
|
||||
// max_waiting_tokens,
|
||||
// max_batch_size,
|
||||
revision.clone(),
|
||||
validation_workers,
|
||||
messages_api_enabled,
|
||||
disable_grammar_support,
|
||||
max_client_batch_size,
|
||||
usage_stats_level,
|
||||
);
|
||||
Some(usage_stats::UserAgent::new(reduced_args))
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(ref ua) = user_agent {
|
||||
|
@ -1780,21 +1777,18 @@ pub async fn run(
|
|||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
if !disable_crash_reports {
|
||||
let error_event = usage_stats::UsageStatsEvent::new(
|
||||
ua.clone(),
|
||||
usage_stats::EventType::Error,
|
||||
Some(e.to_string()),
|
||||
);
|
||||
error_event.send().await;
|
||||
} else {
|
||||
let unknow_error_event = usage_stats::UsageStatsEvent::new(
|
||||
ua.clone(),
|
||||
usage_stats::EventType::Error,
|
||||
Some("unknow_error".to_string()),
|
||||
);
|
||||
unknow_error_event.send().await;
|
||||
}
|
||||
let description = match usage_stats_level {
|
||||
usage_stats::UsageStatsLevel::On => Some(e.to_string()),
|
||||
usage_stats::UsageStatsLevel::NoStack => Some("unknow_error".to_string()),
|
||||
_ => None,
|
||||
};
|
||||
let event = usage_stats::UsageStatsEvent::new(
|
||||
ua.clone(),
|
||||
usage_stats::EventType::Error,
|
||||
description,
|
||||
);
|
||||
event.send().await;
|
||||
|
||||
Err(e)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
use crate::config::Config;
|
||||
use clap::ValueEnum;
|
||||
use csv::ReaderBuilder;
|
||||
use reqwest::header::HeaderMap;
|
||||
use serde::Serialize;
|
||||
|
@ -13,6 +14,13 @@ use uuid::Uuid;
|
|||
|
||||
const TELEMETRY_URL: &str = "https://huggingface.co/api/telemetry/tgi";
|
||||
|
||||
#[derive(Copy, Clone, Debug, Serialize, ValueEnum)]
|
||||
pub enum UsageStatsLevel {
|
||||
On,
|
||||
NoStack,
|
||||
Off,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct UserAgent {
|
||||
pub uid: String,
|
||||
|
@ -71,7 +79,7 @@ impl UsageStatsEvent {
|
|||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct Args {
|
||||
model_config: Option<Config>,
|
||||
tokenizer_config: Option<String>,
|
||||
tokenizer_class: Option<String>,
|
||||
max_concurrent_requests: usize,
|
||||
max_best_of: usize,
|
||||
max_stop_sequences: usize,
|
||||
|
@ -88,15 +96,14 @@ pub struct Args {
|
|||
messages_api_enabled: bool,
|
||||
disable_grammar_support: bool,
|
||||
max_client_batch_size: usize,
|
||||
disable_usage_stats: bool,
|
||||
disable_crash_reports: bool,
|
||||
usage_stats_level: UsageStatsLevel,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
model_config: Option<Config>,
|
||||
tokenizer_config: Option<String>,
|
||||
tokenizer_class: Option<String>,
|
||||
max_concurrent_requests: usize,
|
||||
max_best_of: usize,
|
||||
max_stop_sequences: usize,
|
||||
|
@ -113,12 +120,11 @@ impl Args {
|
|||
messages_api_enabled: bool,
|
||||
disable_grammar_support: bool,
|
||||
max_client_batch_size: usize,
|
||||
disable_usage_stats: bool,
|
||||
disable_crash_reports: bool,
|
||||
usage_stats_level: UsageStatsLevel,
|
||||
) -> Self {
|
||||
Self {
|
||||
model_config,
|
||||
tokenizer_config,
|
||||
tokenizer_class,
|
||||
max_concurrent_requests,
|
||||
max_best_of,
|
||||
max_stop_sequences,
|
||||
|
@ -135,8 +141,7 @@ impl Args {
|
|||
messages_api_enabled,
|
||||
disable_grammar_support,
|
||||
max_client_batch_size,
|
||||
disable_usage_stats,
|
||||
disable_crash_reports,
|
||||
usage_stats_level,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue