refactor usage stats (#2339)

* refactor usage stats

* Update docs/source/usage_statistics.md

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* Update router/src/server.rs

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* changes based on feedback

* run python3 udpate_doc.py

* fix pre-commit

* Update router/src/server.rs

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>

* delete option around usage stats arg

---------

Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
This commit is contained in:
Erik Kaunismäki 2024-07-31 16:29:07 +02:00 committed by GitHub
parent f7f61876cf
commit 7451041ecd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 109 additions and 93 deletions

View File

@ -1,5 +1,5 @@
use clap::{Parser, Subcommand};
use text_generation_router::server;
use text_generation_router::{server, usage_stats};
use text_generation_router_v3::{connect_backend, V3Error};
use thiserror::Error;
@ -68,10 +68,8 @@ struct Args {
disable_grammar_support: bool,
#[clap(default_value = "4", long, env)]
max_client_batch_size: usize,
#[clap(long, env, default_value_t)]
disable_usage_stats: bool,
#[clap(long, env, default_value_t)]
disable_crash_reports: bool,
#[clap(default_value = "on", long, env)]
usage_stats: usage_stats::UsageStatsLevel,
}
#[derive(Debug, Subcommand)]
@ -114,9 +112,8 @@ async fn main() -> Result<(), RouterError> {
ngrok_edge,
messages_api_enabled,
disable_grammar_support,
disable_usage_stats,
disable_crash_reports,
max_client_batch_size,
usage_stats,
} = args;
if let Some(Commands::PrintSchema) = command {
@ -188,8 +185,7 @@ async fn main() -> Result<(), RouterError> {
messages_api_enabled,
disable_grammar_support,
max_client_batch_size,
disable_usage_stats,
disable_crash_reports,
usage_stats,
)
.await?;
Ok(())

View File

@ -431,20 +431,18 @@ Options:
[env: LORA_ADAPTERS=]
```
## DISABLE_USAGE_STATS
## USAGE_STATS
```shell
--disable-usage-stats
Disable sending of all usage statistics
--usage-stats <USAGE_STATS>
Control if anonymous usage stats are collected. Options are "on", "off" and "no-stack" Defaul is on
[env: DISABLE_USAGE_STATS=]
[env: USAGE_STATS=]
[default: on]
```
## DISABLE_CRASH_REPORTS
```shell
--disable-crash-reports
Disable sending of crash reports, but allow anonymous usage statistics
[env: DISABLE_CRASH_REPORTS=]
Possible values:
- on: Default option, usage statistics are collected anonymously
- off: Disables all collection of usage statistics
- no-stack: Doesn't send the error stack trace or error type, but allows sending a crash event
```
## HELP

View File

@ -70,4 +70,6 @@ As of release 2.1.2 this is an example of the data collected:
## How to opt-out
You can easily opt out by passing the `--disable-usage-stats` to the text-generation-launcher command. This will disable all usage statistics. You can also pass `--disable-crash-reports` which disables sending specific crash reports, but allows anonymous usage statistics.
By passing the `--usage-stats` to the text-generation-launcher you can control how much usage statistics are being collected.
`--usage-stats=no-stack` will not emit the stack traces from errors and the error types, but will continue to send start and stop events
`--usage-stats=off` will completely disable everything

View File

@ -168,6 +168,33 @@ impl std::fmt::Display for RopeScaling {
}
}
#[derive(Clone, Copy, Debug, ValueEnum)]
pub enum UsageStatsLevel {
/// Default option, usage statistics are collected anonymously
On,
/// Disables all collection of usage statistics
Off,
/// Doesn't send the error stack trace or error type, but allows sending a crash event
NoStack,
}
impl std::fmt::Display for UsageStatsLevel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// To keep in track with `server`.
match self {
UsageStatsLevel::On => {
write!(f, "on")
}
UsageStatsLevel::Off => {
write!(f, "off")
}
UsageStatsLevel::NoStack => {
write!(f, "no-stack")
}
}
}
}
/// App Configuration
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
@ -466,13 +493,11 @@ struct Args {
#[clap(long, env)]
lora_adapters: Option<String>,
/// Disable sending of all usage statistics
#[clap(default_value = "false", long, env)]
disable_usage_stats: bool,
/// Disable sending of crash reports, but allow anonymous usage statistics
#[clap(default_value = "false", long, env)]
disable_crash_reports: bool,
/// Control if anonymous usage stats are collected.
/// Options are "on", "off" and "no-stack"
/// Defaul is on.
#[clap(default_value = "on", long, env)]
usage_stats: UsageStatsLevel,
}
#[derive(Debug)]
@ -1218,12 +1243,8 @@ fn spawn_webserver(
];
// Pass usage stats flags to router
if args.disable_usage_stats {
router_args.push("--disable-usage-stats".to_string());
}
if args.disable_crash_reports {
router_args.push("--disable-crash-reports".to_string());
}
router_args.push("--usage-stats".to_string());
router_args.push(args.usage_stats.to_string());
// Grammar support
if args.disable_grammar_support {

View File

@ -7,14 +7,13 @@ use crate::kserve::{
kerve_server_metadata, kserve_health_live, kserve_health_ready, kserve_model_infer,
kserve_model_metadata, kserve_model_metadata_ready,
};
use crate::usage_stats;
use crate::validation::ValidationError;
use crate::{
BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName, GenerateParameters,
GenerateRequest, GenerateResponse, GrammarType, HubModelInfo, HubProcessorConfig,
HubTokenizerConfig, Info, Message, MessageChunk, MessageContent, OutputMessage, PrefillToken,
SimpleToken, StreamDetails, StreamResponse, TextMessage, Token, TokenizeResponse,
ToolCallDelta, ToolCallMessage, Url, Usage, Validation,
usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName,
GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo,
HubProcessorConfig, HubTokenizerConfig, Info, Message, MessageChunk, MessageContent,
OutputMessage, PrefillToken, SimpleToken, StreamDetails, StreamResponse, TextMessage, Token,
TokenizeResponse, ToolCallDelta, ToolCallMessage, Url, Usage, Validation,
};
use crate::{
ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete,
@ -1505,8 +1504,7 @@ pub async fn run(
messages_api_enabled: bool,
disable_grammar_support: bool,
max_client_batch_size: usize,
disable_usage_stats: bool,
disable_crash_reports: bool,
usage_stats_level: usage_stats::UsageStatsLevel,
) -> Result<(), WebServerError> {
// CORS allowed origins
// map to go inside the option and then map to parse from String to HeaderValue
@ -1698,33 +1696,32 @@ pub async fn run(
// Only send usage stats when TGI is run in container and the function returns Some
let is_container = matches!(usage_stats::is_container(), Ok(true));
let user_agent = if !disable_usage_stats && is_container {
let reduced_args = usage_stats::Args::new(
config.clone(),
tokenizer_config.tokenizer_class.clone(),
max_concurrent_requests,
max_best_of,
max_stop_sequences,
max_top_n_tokens,
max_input_tokens,
max_total_tokens,
// waiting_served_ratio,
// max_batch_prefill_tokens,
// max_batch_total_tokens,
// max_waiting_tokens,
// max_batch_size,
revision.clone(),
validation_workers,
messages_api_enabled,
disable_grammar_support,
max_client_batch_size,
disable_usage_stats,
disable_crash_reports,
);
Some(usage_stats::UserAgent::new(reduced_args))
} else {
None
let user_agent = match (usage_stats_level, is_container) {
(usage_stats::UsageStatsLevel::On | usage_stats::UsageStatsLevel::NoStack, true) => {
let reduced_args = usage_stats::Args::new(
config.clone(),
tokenizer_config.tokenizer_class.clone(),
max_concurrent_requests,
max_best_of,
max_stop_sequences,
max_top_n_tokens,
max_input_tokens,
max_total_tokens,
// waiting_served_ratio,
// max_batch_prefill_tokens,
// max_batch_total_tokens,
// max_waiting_tokens,
// max_batch_size,
revision.clone(),
validation_workers,
messages_api_enabled,
disable_grammar_support,
max_client_batch_size,
usage_stats_level,
);
Some(usage_stats::UserAgent::new(reduced_args))
}
_ => None,
};
if let Some(ref ua) = user_agent {
@ -1780,21 +1777,18 @@ pub async fn run(
Ok(())
}
Err(e) => {
if !disable_crash_reports {
let error_event = usage_stats::UsageStatsEvent::new(
ua.clone(),
usage_stats::EventType::Error,
Some(e.to_string()),
);
error_event.send().await;
} else {
let unknow_error_event = usage_stats::UsageStatsEvent::new(
ua.clone(),
usage_stats::EventType::Error,
Some("unknow_error".to_string()),
);
unknow_error_event.send().await;
}
let description = match usage_stats_level {
usage_stats::UsageStatsLevel::On => Some(e.to_string()),
usage_stats::UsageStatsLevel::NoStack => Some("unknow_error".to_string()),
_ => None,
};
let event = usage_stats::UsageStatsEvent::new(
ua.clone(),
usage_stats::EventType::Error,
description,
);
event.send().await;
Err(e)
}
}

View File

@ -1,4 +1,5 @@
use crate::config::Config;
use clap::ValueEnum;
use csv::ReaderBuilder;
use reqwest::header::HeaderMap;
use serde::Serialize;
@ -13,6 +14,13 @@ use uuid::Uuid;
const TELEMETRY_URL: &str = "https://huggingface.co/api/telemetry/tgi";
#[derive(Copy, Clone, Debug, Serialize, ValueEnum)]
pub enum UsageStatsLevel {
On,
NoStack,
Off,
}
#[derive(Debug, Clone, Serialize)]
pub struct UserAgent {
pub uid: String,
@ -71,7 +79,7 @@ impl UsageStatsEvent {
#[derive(Debug, Clone, Serialize)]
pub struct Args {
model_config: Option<Config>,
tokenizer_config: Option<String>,
tokenizer_class: Option<String>,
max_concurrent_requests: usize,
max_best_of: usize,
max_stop_sequences: usize,
@ -88,15 +96,14 @@ pub struct Args {
messages_api_enabled: bool,
disable_grammar_support: bool,
max_client_batch_size: usize,
disable_usage_stats: bool,
disable_crash_reports: bool,
usage_stats_level: UsageStatsLevel,
}
impl Args {
#[allow(clippy::too_many_arguments)]
pub fn new(
model_config: Option<Config>,
tokenizer_config: Option<String>,
tokenizer_class: Option<String>,
max_concurrent_requests: usize,
max_best_of: usize,
max_stop_sequences: usize,
@ -113,12 +120,11 @@ impl Args {
messages_api_enabled: bool,
disable_grammar_support: bool,
max_client_batch_size: usize,
disable_usage_stats: bool,
disable_crash_reports: bool,
usage_stats_level: UsageStatsLevel,
) -> Self {
Self {
model_config,
tokenizer_config,
tokenizer_class,
max_concurrent_requests,
max_best_of,
max_stop_sequences,
@ -135,8 +141,7 @@ impl Args {
messages_api_enabled,
disable_grammar_support,
max_client_batch_size,
disable_usage_stats,
disable_crash_reports,
usage_stats_level,
}
}
}