refactor usage stats (#2339)

* refactor usage stats * Update docs/source/usage_statistics.md Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * Update router/src/server.rs Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * changes based on feedback * run python3 udpate_doc.py * fix pre-commit * Update router/src/server.rs Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com> * delete option around usage stats arg --------- Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
2024-07-31 16:29:07 +02:00 · 2024-07-31 16:29:07 +02:00 · 7451041ecd
parent f7f61876cf
commit 7451041ecd
6 changed files with 109 additions and 93 deletions
--- a/backends/v3/src/main.rs
+++ b/backends/v3/src/main.rs
@ -1,5 +1,5 @@
 use clap::{Parser, Subcommand};
-use text_generation_router::server;
+use text_generation_router::{server, usage_stats};
 use text_generation_router_v3::{connect_backend, V3Error};
 use thiserror::Error;

@ -68,10 +68,8 @@ struct Args {
    disable_grammar_support: bool,
    #[clap(default_value = "4", long, env)]
    max_client_batch_size: usize,
-    #[clap(long, env, default_value_t)]
-    disable_usage_stats: bool,
-    #[clap(long, env, default_value_t)]
-    disable_crash_reports: bool,
+    #[clap(default_value = "on", long, env)]
+    usage_stats: usage_stats::UsageStatsLevel,
 }

 #[derive(Debug, Subcommand)]
@ -114,9 +112,8 @@ async fn main() -> Result<(), RouterError> {
        ngrok_edge,
        messages_api_enabled,
        disable_grammar_support,
-        disable_usage_stats,
-        disable_crash_reports,
        max_client_batch_size,
+        usage_stats,
    } = args;

    if let Some(Commands::PrintSchema) = command {
@ -188,8 +185,7 @@ async fn main() -> Result<(), RouterError> {
        messages_api_enabled,
        disable_grammar_support,
        max_client_batch_size,
-        disable_usage_stats,
-        disable_crash_reports,
+        usage_stats,
    )
    .await?;
    Ok(())
--- a/docs/source/basic_tutorials/launcher.md
+++ b/docs/source/basic_tutorials/launcher.md
@ -431,20 +431,18 @@ Options:
          [env: LORA_ADAPTERS=]

 ```
-## DISABLE_USAGE_STATS
+## USAGE_STATS
 ```shell
-      --disable-usage-stats
-          Disable sending of all usage statistics
+      --usage-stats <USAGE_STATS>
+          Control if anonymous usage stats are collected. Options are "on", "off" and "no-stack" Defaul is on
          
-          [env: DISABLE_USAGE_STATS=]
+          [env: USAGE_STATS=]
+          [default: on]

-```
-## DISABLE_CRASH_REPORTS
-```shell
-      --disable-crash-reports
-          Disable sending of crash reports, but allow anonymous usage statistics
-          
-          [env: DISABLE_CRASH_REPORTS=]
+          Possible values:
+          - on:       Default option, usage statistics are collected anonymously
+          - off:      Disables all collection of usage statistics
+          - no-stack: Doesn't send the error stack trace or error type, but allows sending a crash event

 ```
 ## HELP
--- a/docs/source/usage_statistics.md
+++ b/docs/source/usage_statistics.md
@ -70,4 +70,6 @@ As of release 2.1.2 this is an example of the data collected:

 ## How to opt-out

-You can easily opt out by passing the `--disable-usage-stats` to the text-generation-launcher command. This will disable all usage statistics. You can also pass `--disable-crash-reports` which disables sending specific crash reports, but allows anonymous usage statistics.
+By passing the `--usage-stats` to the text-generation-launcher you can control how much usage statistics are being collected.
+`--usage-stats=no-stack` will not emit the stack traces from errors and the error types, but will continue to send start and stop events
+`--usage-stats=off` will completely disable everything
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@ -168,6 +168,33 @@ impl std::fmt::Display for RopeScaling {
    }
 }

+#[derive(Clone, Copy, Debug, ValueEnum)]
+pub enum UsageStatsLevel {
+    /// Default option, usage statistics are collected anonymously
+    On,
+    /// Disables all collection of usage statistics
+    Off,
+    /// Doesn't send the error stack trace or error type, but allows sending a crash event
+    NoStack,
+}
+
+impl std::fmt::Display for UsageStatsLevel {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        // To keep in track with `server`.
+        match self {
+            UsageStatsLevel::On => {
+                write!(f, "on")
+            }
+            UsageStatsLevel::Off => {
+                write!(f, "off")
+            }
+            UsageStatsLevel::NoStack => {
+                write!(f, "no-stack")
+            }
+        }
+    }
+}
+
 /// App Configuration
 #[derive(Parser, Debug)]
 #[clap(author, version, about, long_about = None)]
@ -466,13 +493,11 @@ struct Args {
    #[clap(long, env)]
    lora_adapters: Option<String>,

-    /// Disable sending of all usage statistics
-    #[clap(default_value = "false", long, env)]
-    disable_usage_stats: bool,
-
-    /// Disable sending of crash reports, but allow anonymous usage statistics
-    #[clap(default_value = "false", long, env)]
-    disable_crash_reports: bool,
+    /// Control if anonymous usage stats are collected.
+    /// Options are "on", "off" and "no-stack"
+    /// Defaul is on.
+    #[clap(default_value = "on", long, env)]
+    usage_stats: UsageStatsLevel,
 }

 #[derive(Debug)]
@ -1218,12 +1243,8 @@ fn spawn_webserver(
    ];

    // Pass usage stats flags to router
-    if args.disable_usage_stats {
-        router_args.push("--disable-usage-stats".to_string());
-    }
-    if args.disable_crash_reports {
-        router_args.push("--disable-crash-reports".to_string());
-    }
+    router_args.push("--usage-stats".to_string());
+    router_args.push(args.usage_stats.to_string());

    // Grammar support
    if args.disable_grammar_support {
--- a/router/src/server.rs
+++ b/router/src/server.rs
@ -7,14 +7,13 @@ use crate::kserve::{
    kerve_server_metadata, kserve_health_live, kserve_health_ready, kserve_model_infer,
    kserve_model_metadata, kserve_model_metadata_ready,
 };
-use crate::usage_stats;
 use crate::validation::ValidationError;
 use crate::{
-    BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName, GenerateParameters,
-    GenerateRequest, GenerateResponse, GrammarType, HubModelInfo, HubProcessorConfig,
-    HubTokenizerConfig, Info, Message, MessageChunk, MessageContent, OutputMessage, PrefillToken,
-    SimpleToken, StreamDetails, StreamResponse, TextMessage, Token, TokenizeResponse,
-    ToolCallDelta, ToolCallMessage, Url, Usage, Validation,
+    usage_stats, BestOfSequence, Details, ErrorResponse, FinishReason, FunctionName,
+    GenerateParameters, GenerateRequest, GenerateResponse, GrammarType, HubModelInfo,
+    HubProcessorConfig, HubTokenizerConfig, Info, Message, MessageChunk, MessageContent,
+    OutputMessage, PrefillToken, SimpleToken, StreamDetails, StreamResponse, TextMessage, Token,
+    TokenizeResponse, ToolCallDelta, ToolCallMessage, Url, Usage, Validation,
 };
 use crate::{
    ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete,
@ -1505,8 +1504,7 @@ pub async fn run(
    messages_api_enabled: bool,
    disable_grammar_support: bool,
    max_client_batch_size: usize,
-    disable_usage_stats: bool,
-    disable_crash_reports: bool,
+    usage_stats_level: usage_stats::UsageStatsLevel,
 ) -> Result<(), WebServerError> {
    // CORS allowed origins
    // map to go inside the option and then map to parse from String to HeaderValue
@ -1698,33 +1696,32 @@ pub async fn run(

    // Only send usage stats when TGI is run in container and the function returns Some
    let is_container = matches!(usage_stats::is_container(), Ok(true));
-
-    let user_agent = if !disable_usage_stats && is_container {
-        let reduced_args = usage_stats::Args::new(
-            config.clone(),
-            tokenizer_config.tokenizer_class.clone(),
-            max_concurrent_requests,
-            max_best_of,
-            max_stop_sequences,
-            max_top_n_tokens,
-            max_input_tokens,
-            max_total_tokens,
-            // waiting_served_ratio,
-            // max_batch_prefill_tokens,
-            // max_batch_total_tokens,
-            // max_waiting_tokens,
-            // max_batch_size,
-            revision.clone(),
-            validation_workers,
-            messages_api_enabled,
-            disable_grammar_support,
-            max_client_batch_size,
-            disable_usage_stats,
-            disable_crash_reports,
-        );
-        Some(usage_stats::UserAgent::new(reduced_args))
-    } else {
-        None
+    let user_agent = match (usage_stats_level, is_container) {
+        (usage_stats::UsageStatsLevel::On | usage_stats::UsageStatsLevel::NoStack, true) => {
+            let reduced_args = usage_stats::Args::new(
+                config.clone(),
+                tokenizer_config.tokenizer_class.clone(),
+                max_concurrent_requests,
+                max_best_of,
+                max_stop_sequences,
+                max_top_n_tokens,
+                max_input_tokens,
+                max_total_tokens,
+                // waiting_served_ratio,
+                // max_batch_prefill_tokens,
+                // max_batch_total_tokens,
+                // max_waiting_tokens,
+                // max_batch_size,
+                revision.clone(),
+                validation_workers,
+                messages_api_enabled,
+                disable_grammar_support,
+                max_client_batch_size,
+                usage_stats_level,
+            );
+            Some(usage_stats::UserAgent::new(reduced_args))
+        }
+        _ => None,
    };

    if let Some(ref ua) = user_agent {
@ -1780,21 +1777,18 @@ pub async fn run(
                Ok(())
            }
            Err(e) => {
-                if !disable_crash_reports {
-                    let error_event = usage_stats::UsageStatsEvent::new(
-                        ua.clone(),
-                        usage_stats::EventType::Error,
-                        Some(e.to_string()),
-                    );
-                    error_event.send().await;
-                } else {
-                    let unknow_error_event = usage_stats::UsageStatsEvent::new(
-                        ua.clone(),
-                        usage_stats::EventType::Error,
-                        Some("unknow_error".to_string()),
-                    );
-                    unknow_error_event.send().await;
-                }
+                let description = match usage_stats_level {
+                    usage_stats::UsageStatsLevel::On => Some(e.to_string()),
+                    usage_stats::UsageStatsLevel::NoStack => Some("unknow_error".to_string()),
+                    _ => None,
+                };
+                let event = usage_stats::UsageStatsEvent::new(
+                    ua.clone(),
+                    usage_stats::EventType::Error,
+                    description,
+                );
+                event.send().await;
+
                Err(e)
            }
        }
--- a/router/src/usage_stats.rs
+++ b/router/src/usage_stats.rs
@ -1,4 +1,5 @@
 use crate::config::Config;
+use clap::ValueEnum;
 use csv::ReaderBuilder;
 use reqwest::header::HeaderMap;
 use serde::Serialize;
@ -13,6 +14,13 @@ use uuid::Uuid;

 const TELEMETRY_URL: &str = "https://huggingface.co/api/telemetry/tgi";

+#[derive(Copy, Clone, Debug, Serialize, ValueEnum)]
+pub enum UsageStatsLevel {
+    On,
+    NoStack,
+    Off,
+}
+
 #[derive(Debug, Clone, Serialize)]
 pub struct UserAgent {
    pub uid: String,
@ -71,7 +79,7 @@ impl UsageStatsEvent {
 #[derive(Debug, Clone, Serialize)]
 pub struct Args {
    model_config: Option<Config>,
-    tokenizer_config: Option<String>,
+    tokenizer_class: Option<String>,
    max_concurrent_requests: usize,
    max_best_of: usize,
    max_stop_sequences: usize,
@ -88,15 +96,14 @@ pub struct Args {
    messages_api_enabled: bool,
    disable_grammar_support: bool,
    max_client_batch_size: usize,
-    disable_usage_stats: bool,
-    disable_crash_reports: bool,
+    usage_stats_level: UsageStatsLevel,
 }

 impl Args {
    #[allow(clippy::too_many_arguments)]
    pub fn new(
        model_config: Option<Config>,
-        tokenizer_config: Option<String>,
+        tokenizer_class: Option<String>,
        max_concurrent_requests: usize,
        max_best_of: usize,
        max_stop_sequences: usize,
@ -113,12 +120,11 @@ impl Args {
        messages_api_enabled: bool,
        disable_grammar_support: bool,
        max_client_batch_size: usize,
-        disable_usage_stats: bool,
-        disable_crash_reports: bool,
+        usage_stats_level: UsageStatsLevel,
    ) -> Self {
        Self {
            model_config,
-            tokenizer_config,
+            tokenizer_class,
            max_concurrent_requests,
            max_best_of,
            max_stop_sequences,
@ -135,8 +141,7 @@ impl Args {
            messages_api_enabled,
            disable_grammar_support,
            max_client_batch_size,
-            disable_usage_stats,
-            disable_crash_reports,
+            usage_stats_level,
        }
    }
 }