diff --git a/backends/trtllm/csrc/backend.cpp b/backends/trtllm/csrc/backend.cpp index 5e52261e..b50044d8 100644 --- a/backends/trtllm/csrc/backend.cpp +++ b/backends/trtllm/csrc/backend.cpp @@ -7,7 +7,7 @@ #include "hardware.hpp" namespace huggingface::tgi::backends::trtllm { - constexpr tle::ParallelConfig backend_workspace_t::parallel_config() const { + tle::ParallelConfig backend_workspace_t::parallel_config() const { // Single engine (TP = PP = 1) -> using leader mode (no MPI involved) const auto world_size = config_["/pretrained_config/mapping/world_size"_json_pointer].get(); diff --git a/backends/trtllm/csrc/backend.hpp b/backends/trtllm/csrc/backend.hpp index 5a08b2d4..61aa11d0 100644 --- a/backends/trtllm/csrc/backend.hpp +++ b/backends/trtllm/csrc/backend.hpp @@ -127,7 +127,7 @@ namespace huggingface::tgi::backends::trtllm { * to initialize `tensorrt_llm::executor::Executor` with multi-instance communication information * @return `tensorrt_llm::executor::ParallelConfig` instance */ - [[nodiscard]] constexpr tle::ParallelConfig parallel_config() const; + [[nodiscard]] tle::ParallelConfig parallel_config() const; /** * Factory method returning new `tensorrt_llm::executor::ExecutorConfig` instance used