(misc): disable logging in release mode

This commit is contained in:
Morgan Funtowicz 2024-10-10 14:11:25 +00:00 committed by Morgan Funtowicz
parent 437c2aa142
commit 0c3ba932cc
2 changed files with 6 additions and 10 deletions

View File

@ -106,7 +106,7 @@ huggingface::tgi::backends::TensorRtLlmBackend::TensorRtLlmBackend(
size_t huggingface::tgi::backends::TensorRtLlmBackend::NumResponsesReady() const { size_t huggingface::tgi::backends::TensorRtLlmBackend::NumResponsesReady() const {
const auto numResponses = executor.getNumResponsesReady(); const auto numResponses = executor.getNumResponsesReady();
#ifdef NDEBUG #ifndef NDEBUG
if(numResponses > 0) SPDLOG_INFO(FMT_STRING("Num responses ready: {:d}"), numResponses); if(numResponses > 0) SPDLOG_INFO(FMT_STRING("Num responses ready: {:d}"), numResponses);
#endif #endif
@ -124,13 +124,7 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
const float_t frequency_penalty, const float_t frequency_penalty,
const uint64_t seed const uint64_t seed
) { ) {
#ifdef NDEBUG #ifndef NDEBUG
SPDLOG_DEBUG(
FMT_STRING("Submitting inference over {:d} tokens to the executor ({:d} already in-flight)"),
tokens.size(),
executor.getLatestIterationStats().back().numActiveRequests
);
#else
SPDLOG_DEBUG( SPDLOG_DEBUG(
FMT_STRING("Submitting inference [{}] to the executor ({:d} already in-flight)"), FMT_STRING("Submitting inference [{}] to the executor ({:d} already in-flight)"),
fmt::join(tokens, ", "), fmt::join(tokens, ", "),
@ -142,7 +136,7 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
const auto maxNewTokensChecked = static_cast<tle::SizeType32>( const auto maxNewTokensChecked = static_cast<tle::SizeType32>(
std::min(maxNewTokens, static_cast<uint32_t>(maxNumTokens - tokens.size()))); std::min(maxNewTokens, static_cast<uint32_t>(maxNumTokens - tokens.size())));
#ifdef NDEBUG #ifndef NDEBUG
SPDLOG_INFO( SPDLOG_INFO(
FMT_STRING("Sampling config: topK={:d}, topP={:d}, temperature={:d}, repetition_penalty={:d}, frequency_penalty={:d}, seed={:d}"), FMT_STRING("Sampling config: topK={:d}, topP={:d}, temperature={:d}, repetition_penalty={:d}, frequency_penalty={:d}, seed={:d}"),
topK, topP, temperature, repetition_penalty, frequency_penalty, seed topK, topP, temperature, repetition_penalty, frequency_penalty, seed
@ -156,4 +150,4 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::PullNewTokens() { std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::PullNewTokens() {
return executor.awaitResponses(); return executor.awaitResponses();
} }

View File

@ -40,7 +40,9 @@ huggingface::tgi::backends::TensorRtLlmBackendImpl::PullTokens() {
auto steps = std::make_unique<std::vector<GenerationStep>>(); auto steps = std::make_unique<std::vector<GenerationStep>>();
steps->reserve(responses.size()); steps->reserve(responses.size());
#ifndef NDEBUG
SPDLOG_DEBUG(FMT_STRING("Pulled out {:d} new tokens"), responses->size()); SPDLOG_DEBUG(FMT_STRING("Pulled out {:d} new tokens"), responses->size());
#endif
// Transform tle::Response to GenerationStep // Transform tle::Response to GenerationStep
std::ranges::transform(responses.begin(), responses.end(), std::back_inserter(*steps), [](const tle::Response &r) { std::ranges::transform(responses.begin(), responses.end(), std::back_inserter(*steps), [](const tle::Response &r) {