feat(router): log input/ouput at debug level (#364)

@njhill FYI
This commit is contained in:
OlivierDehaene 2023-05-23 20:47:37 +02:00 committed by GitHub
parent e3e487dc71
commit 942005386a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 7 deletions

View File

@ -231,8 +231,11 @@ def launcher(event_loop):
if quantize: if quantize:
args.append("--quantize") args.append("--quantize")
env = os.environ
env["LOG_LEVEL"] = "info,text_generation_router=debug"
with subprocess.Popen( with subprocess.Popen(
args, stdout=subprocess.PIPE, stderr=subprocess.PIPE args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
) as process: ) as process:
yield ProcessLauncherHandle(process, port) yield ProcessLauncherHandle(process, port)
@ -271,7 +274,7 @@ def launcher(event_loop):
gpu_count = num_shard if num_shard is not None else 1 gpu_count = num_shard if num_shard is not None else 1
env = {} env = {"LOG_LEVEL": "info,text_generation_router=debug"}
if HUGGING_FACE_HUB_TOKEN is not None: if HUGGING_FACE_HUB_TOKEN is not None:
env["HUGGING_FACE_HUB_TOKEN"] = HUGGING_FACE_HUB_TOKEN env["HUGGING_FACE_HUB_TOKEN"] = HUGGING_FACE_HUB_TOKEN

View File

@ -52,7 +52,7 @@ use utoipa_swagger_ui::SwaggerUi;
example = json ! ({"error": "Incomplete generation"})), example = json ! ({"error": "Incomplete generation"})),
) )
)] )]
#[instrument(skip(infer))] #[instrument(skip(infer, req))]
async fn compat_generate( async fn compat_generate(
default_return_full_text: Extension<bool>, default_return_full_text: Extension<bool>,
infer: Extension<Infer>, infer: Extension<Infer>,
@ -133,8 +133,9 @@ async fn health(mut health: Extension<Health>) -> Result<(), (StatusCode, Json<E
) )
)] )]
#[instrument( #[instrument(
skip(infer), skip_all,
fields( fields(
parameters = ?req.0.parameters,
total_time, total_time,
validation_time, validation_time,
queue_time, queue_time,
@ -151,6 +152,8 @@ async fn generate(
let start_time = Instant::now(); let start_time = Instant::now();
metrics::increment_counter!("tgi_request_count"); metrics::increment_counter!("tgi_request_count");
tracing::debug!("Input: {}", req.0.inputs);
let compute_characters = req.0.inputs.chars().count(); let compute_characters = req.0.inputs.chars().count();
let mut add_prompt = None; let mut add_prompt = None;
if req.0.parameters.return_full_text.unwrap_or(false) { if req.0.parameters.return_full_text.unwrap_or(false) {
@ -282,7 +285,8 @@ async fn generate(
output_text = prompt + &output_text; output_text = prompt + &output_text;
} }
tracing::info!("Output: {}", output_text); tracing::debug!("Output: {}", output_text);
tracing::info!("Success");
let response = GenerateResponse { let response = GenerateResponse {
generated_text: output_text, generated_text: output_text,
@ -315,8 +319,9 @@ async fn generate(
) )
)] )]
#[instrument( #[instrument(
skip(infer), skip_all,
fields( fields(
parameters = ?req.0.parameters,
total_time, total_time,
validation_time, validation_time,
queue_time, queue_time,
@ -336,6 +341,8 @@ async fn generate_stream(
let start_time = Instant::now(); let start_time = Instant::now();
metrics::increment_counter!("tgi_request_count"); metrics::increment_counter!("tgi_request_count");
tracing::debug!("Input: {}", req.0.inputs);
let compute_characters = req.0.inputs.chars().count(); let compute_characters = req.0.inputs.chars().count();
let mut headers = HeaderMap::new(); let mut headers = HeaderMap::new();
@ -370,6 +377,8 @@ async fn generate_stream(
InferStreamResponse::Prefill(_) => {} InferStreamResponse::Prefill(_) => {}
// Yield event for every new token // Yield event for every new token
InferStreamResponse::Token(token) => { InferStreamResponse::Token(token) => {
tracing::debug!(parent: &span, "Token: {:?}", token);
// StreamResponse // StreamResponse
let stream_token = StreamResponse { let stream_token = StreamResponse {
token, token,
@ -428,7 +437,8 @@ async fn generate_stream(
output_text = prompt + &output_text; output_text = prompt + &output_text;
} }
tracing::info!(parent: &span, "Output: {}", output_text); tracing::debug!(parent: &span, "Output: {}", output_text);
tracing::info!(parent: &span, "Success");
let stream_token = StreamResponse { let stream_token = StreamResponse {
token, token,