chore(trtllm): remove unused method

This commit is contained in:
Morgan Funtowicz 2024-10-21 14:10:23 +02:00
parent fb00f985ae
commit 31747163e7
2 changed files with 7 additions and 23 deletions

View File

@ -72,12 +72,6 @@ namespace huggingface::tgi::backends {
const std::filesystem::path &executorWorker const std::filesystem::path &executorWorker
); );
/**
* Query the executor for the number of token available for pulling
* @return
*/
[[nodiscard]] size_t NumResponsesReady() const;
/** /**
* Submit a new generation task to the executor * Submit a new generation task to the executor
* @param tokens * @param tokens

View File

@ -102,17 +102,6 @@ huggingface::tgi::backends::TensorRtLlmBackend::TensorRtLlmBackend(
SPDLOG_INFO(FMT_STRING("Engine (version={})"), config["/version"_json_pointer].get_ref<const std::string &>()); SPDLOG_INFO(FMT_STRING("Engine (version={})"), config["/version"_json_pointer].get_ref<const std::string &>());
} }
[[nodiscard("Returned number of requests needs to be consumed")]]
size_t huggingface::tgi::backends::TensorRtLlmBackend::NumResponsesReady() const {
const auto numResponses = executor.getNumResponsesReady();
#ifndef NDEBUG
if(numResponses > 0) SPDLOG_INFO(FMT_STRING("Num responses ready: {:d}"), numResponses);
#endif
return numResponses;
}
[[nodiscard("Returned request id needs to be provided back to gather generated tokens")]] [[nodiscard("Returned request id needs to be provided back to gather generated tokens")]]
tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit( tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
const std::vector<tle::TokenIdType> &tokens, const std::vector<tle::TokenIdType> &tokens,
@ -138,7 +127,8 @@ tle::IdType huggingface::tgi::backends::TensorRtLlmBackend::Submit(
#ifndef NDEBUG #ifndef NDEBUG
SPDLOG_INFO( SPDLOG_INFO(
FMT_STRING("Sampling config: topK={:d}, topP={:d}, temperature={:d}, repetition_penalty={:d}, frequency_penalty={:d}, seed={:d}"), FMT_STRING(
"Sampling config: topK={:d}, topP={:d}, temperature={:d}, repetition_penalty={:d}, frequency_penalty={:d}, seed={:d}"),
topK, topP, temperature, repetition_penalty, frequency_penalty, seed topK, topP, temperature, repetition_penalty, frequency_penalty, seed
) )
SPDLOG_INFO(FMT_STRING("Asking for max_new_tokens={:d}"), maxNewTokensChecked); SPDLOG_INFO(FMT_STRING("Asking for max_new_tokens={:d}"), maxNewTokensChecked);