diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 3086ecd..7db1242 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -231,8 +231,11 @@ def launcher(event_loop): if quantize: args.append("--quantize") + env = os.environ + env["LOG_LEVEL"] = "info,text_generation_router=debug" + with subprocess.Popen( - args, stdout=subprocess.PIPE, stderr=subprocess.PIPE + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env ) as process: yield ProcessLauncherHandle(process, port) @@ -271,7 +274,7 @@ def launcher(event_loop): gpu_count = num_shard if num_shard is not None else 1 - env = {} + env = {"LOG_LEVEL": "info,text_generation_router=debug"} if HUGGING_FACE_HUB_TOKEN is not None: env["HUGGING_FACE_HUB_TOKEN"] = HUGGING_FACE_HUB_TOKEN diff --git a/router/src/server.rs b/router/src/server.rs index 162b6fd..fd6a66b 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -52,7 +52,7 @@ use utoipa_swagger_ui::SwaggerUi; example = json ! ({"error": "Incomplete generation"})), ) )] -#[instrument(skip(infer))] +#[instrument(skip(infer, req))] async fn compat_generate( default_return_full_text: Extension, infer: Extension, @@ -133,8 +133,9 @@ async fn health(mut health: Extension) -> Result<(), (StatusCode, Json {} // Yield event for every new token InferStreamResponse::Token(token) => { + tracing::debug!(parent: &span, "Token: {:?}", token); + // StreamResponse let stream_token = StreamResponse { token, @@ -428,7 +437,8 @@ async fn generate_stream( output_text = prompt + &output_text; } - tracing::info!(parent: &span, "Output: {}", output_text); + tracing::debug!(parent: &span, "Output: {}", output_text); + tracing::info!(parent: &span, "Success"); let stream_token = StreamResponse { token,