From 0c3ba932ccff20e037f24bc55bd9f62b2210025e Mon Sep 17 00:00:00 2001
From: Morgan Funtowicz <morgan@huggingface.co>
Date: Thu, 10 Oct 2024 14:11:25 +0000
Subject: [PATCH] (misc): disable logging in release mode

---
 backends/trtllm/lib/backend.cpp | 14 ++++----------
 backends/trtllm/src/ffi.cpp     |  2 ++
 2 files changed, 6 insertions(+), 10 deletions(-)
diff --git a/backends/trtllm/lib/backend.cpp b/backends/trtllm/lib/backend.cpp
index 16f45f5d..96f5f9f4 100644
--- a/backends/trtllm/lib/backend.cpp
+++ b/backends/trtllm/lib/backend.cpp
@@ -106,7 +106,7 @@ huggingface::tgi::backends::TensorRtLlmBackend::TensorRtLlmBackend(
 size_t huggingface::tgi::backends::TensorRtLlmBackend::NumResponsesReady() const {
     const auto numResponses = executor.getNumResponsesReady();
 
-#ifdef NDEBUG
+#ifndef NDEBUG
     if(numResponses > 0) SPDLOG_INFO(FMT_STRING("Num responses ready: {:d}"), numResponses);
 #endif
 
@@ -124,13 +124,7 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
         const float_t frequency_penalty,
         const uint64_t seed
 ) {
-#ifdef NDEBUG
-    SPDLOG_DEBUG(
-            FMT_STRING("Submitting inference over {:d} tokens to the executor ({:d} already in-flight)"),
-            tokens.size(),
-            executor.getLatestIterationStats().back().numActiveRequests
-    );
-#else
+#ifndef NDEBUG
     SPDLOG_DEBUG(
             FMT_STRING("Submitting inference [{}] to the executor ({:d} already in-flight)"),
             fmt::join(tokens, ", "),
@@ -142,7 +136,7 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
     const auto maxNewTokensChecked = static_cast<tle::SizeType32>(
             std::min(maxNewTokens, static_cast<uint32_t>(maxNumTokens - tokens.size())));
 
-#ifdef NDEBUG
+#ifndef NDEBUG
     SPDLOG_INFO(
         FMT_STRING("Sampling config: topK={:d}, topP={:d}, temperature={:d}, repetition_penalty={:d}, frequency_penalty={:d}, seed={:d}"),
         topK, topP, temperature, repetition_penalty, frequency_penalty, seed
@@ -156,4 +150,4 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
 
 std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::PullNewTokens() {
     return executor.awaitResponses();
-}
+}
\ No newline at end of file
diff --git a/backends/trtllm/src/ffi.cpp b/backends/trtllm/src/ffi.cpp
index 54c17bc4..80e74cf7 100644
--- a/backends/trtllm/src/ffi.cpp
+++ b/backends/trtllm/src/ffi.cpp
@@ -40,7 +40,9 @@ huggingface::tgi::backends::TensorRtLlmBackendImpl::PullTokens() {
     auto steps = std::make_unique<std::vector<GenerationStep>>();
     steps->reserve(responses.size());
 
+#ifndef NDEBUG
     SPDLOG_DEBUG(FMT_STRING("Pulled out {:d} new tokens"), responses->size());
+#endif
 
     // Transform tle::Response to GenerationStep
     std::ranges::transform(responses.begin(), responses.end(), std::back_inserter(*steps), [](const tle::Response &r) {