(misc): disable logging in release mode
This commit is contained in:
parent
437c2aa142
commit
0c3ba932cc
|
@ -106,7 +106,7 @@ huggingface::tgi::backends::TensorRtLlmBackend::TensorRtLlmBackend(
|
||||||
size_t huggingface::tgi::backends::TensorRtLlmBackend::NumResponsesReady() const {
|
size_t huggingface::tgi::backends::TensorRtLlmBackend::NumResponsesReady() const {
|
||||||
const auto numResponses = executor.getNumResponsesReady();
|
const auto numResponses = executor.getNumResponsesReady();
|
||||||
|
|
||||||
#ifdef NDEBUG
|
#ifndef NDEBUG
|
||||||
if(numResponses > 0) SPDLOG_INFO(FMT_STRING("Num responses ready: {:d}"), numResponses);
|
if(numResponses > 0) SPDLOG_INFO(FMT_STRING("Num responses ready: {:d}"), numResponses);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -124,13 +124,7 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
|
||||||
const float_t frequency_penalty,
|
const float_t frequency_penalty,
|
||||||
const uint64_t seed
|
const uint64_t seed
|
||||||
) {
|
) {
|
||||||
#ifdef NDEBUG
|
#ifndef NDEBUG
|
||||||
SPDLOG_DEBUG(
|
|
||||||
FMT_STRING("Submitting inference over {:d} tokens to the executor ({:d} already in-flight)"),
|
|
||||||
tokens.size(),
|
|
||||||
executor.getLatestIterationStats().back().numActiveRequests
|
|
||||||
);
|
|
||||||
#else
|
|
||||||
SPDLOG_DEBUG(
|
SPDLOG_DEBUG(
|
||||||
FMT_STRING("Submitting inference [{}] to the executor ({:d} already in-flight)"),
|
FMT_STRING("Submitting inference [{}] to the executor ({:d} already in-flight)"),
|
||||||
fmt::join(tokens, ", "),
|
fmt::join(tokens, ", "),
|
||||||
|
@ -142,7 +136,7 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
|
||||||
const auto maxNewTokensChecked = static_cast<tle::SizeType32>(
|
const auto maxNewTokensChecked = static_cast<tle::SizeType32>(
|
||||||
std::min(maxNewTokens, static_cast<uint32_t>(maxNumTokens - tokens.size())));
|
std::min(maxNewTokens, static_cast<uint32_t>(maxNumTokens - tokens.size())));
|
||||||
|
|
||||||
#ifdef NDEBUG
|
#ifndef NDEBUG
|
||||||
SPDLOG_INFO(
|
SPDLOG_INFO(
|
||||||
FMT_STRING("Sampling config: topK={:d}, topP={:d}, temperature={:d}, repetition_penalty={:d}, frequency_penalty={:d}, seed={:d}"),
|
FMT_STRING("Sampling config: topK={:d}, topP={:d}, temperature={:d}, repetition_penalty={:d}, frequency_penalty={:d}, seed={:d}"),
|
||||||
topK, topP, temperature, repetition_penalty, frequency_penalty, seed
|
topK, topP, temperature, repetition_penalty, frequency_penalty, seed
|
||||||
|
@ -156,4 +150,4 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
|
||||||
|
|
||||||
std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::PullNewTokens() {
|
std::vector<tle::Response> huggingface::tgi::backends::TensorRtLlmBackend::PullNewTokens() {
|
||||||
return executor.awaitResponses();
|
return executor.awaitResponses();
|
||||||
}
|
}
|
|
@ -40,7 +40,9 @@ huggingface::tgi::backends::TensorRtLlmBackendImpl::PullTokens() {
|
||||||
auto steps = std::make_unique<std::vector<GenerationStep>>();
|
auto steps = std::make_unique<std::vector<GenerationStep>>();
|
||||||
steps->reserve(responses.size());
|
steps->reserve(responses.size());
|
||||||
|
|
||||||
|
#ifndef NDEBUG
|
||||||
SPDLOG_DEBUG(FMT_STRING("Pulled out {:d} new tokens"), responses->size());
|
SPDLOG_DEBUG(FMT_STRING("Pulled out {:d} new tokens"), responses->size());
|
||||||
|
#endif
|
||||||
|
|
||||||
// Transform tle::Response to GenerationStep
|
// Transform tle::Response to GenerationStep
|
||||||
std::ranges::transform(responses.begin(), responses.end(), std::back_inserter(*steps), [](const tle::Response &r) {
|
std::ranges::transform(responses.begin(), responses.end(), std::back_inserter(*steps), [](const tle::Response &r) {
|
||||||
|
|
Loading…
Reference in New Issue