(misc): disable logging in release mode

This commit is contained in:
Morgan Funtowicz 2024-10-10 14:11:25 +00:00 committed by Morgan Funtowicz
parent 437c2aa142
commit 0c3ba932cc
2 changed files with 6 additions and 10 deletions

View File

@ -106,7 +106,7 @@ huggingface::tgi::backends::TensorRtLlmBackend::TensorRtLlmBackend(
size_t huggingface::tgi::backends::TensorRtLlmBackend::NumResponsesReady() const {
const auto numResponses = executor.getNumResponsesReady();
#ifdef NDEBUG
#ifndef NDEBUG
if(numResponses > 0) SPDLOG_INFO(FMT_STRING("Num responses ready: {:d}"), numResponses);
#endif
@ -124,13 +124,7 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
const float_t frequency_penalty,
const uint64_t seed
) {
#ifdef NDEBUG
SPDLOG_DEBUG(
FMT_STRING("Submitting inference over {:d} tokens to the executor ({:d} already in-flight)"),
tokens.size(),
executor.getLatestIterationStats().back().numActiveRequests
);
#else
#ifndef NDEBUG
SPDLOG_DEBUG(
FMT_STRING("Submitting inference [{}] to the executor ({:d} already in-flight)"),
fmt::join(tokens, ", "),
@ -142,7 +136,7 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
const auto maxNewTokensChecked = static_cast<tle::SizeType32>(
std::min(maxNewTokens, static_cast<uint32_t>(maxNumTokens - tokens.size())));
#ifdef NDEBUG
#ifndef NDEBUG
SPDLOG_INFO(
FMT_STRING("Sampling config: topK={:d}, topP={:d}, temperature={:d}, repetition_penalty={:d}, frequency_penalty={:d}, seed={:d}"),
topK, topP, temperature, repetition_penalty, frequency_penalty, seed
@ -156,4 +150,4 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::PullNewTokens() {
return executor.awaitResponses();
}
}

View File

@ -40,7 +40,9 @@ huggingface::tgi::backends::TensorRtLlmBackendImpl::PullTokens() {
auto steps = std::make_unique<std::vector<GenerationStep>>();
steps->reserve(responses.size());
#ifndef NDEBUG
SPDLOG_DEBUG(FMT_STRING("Pulled out {:d} new tokens"), responses->size());
#endif
// Transform tle::Response to GenerationStep
std::ranges::transform(responses.begin(), responses.end(), std::back_inserter(*steps), [](const tle::Response &r) {