diff --git a/backends/llamacpp/csrc/backend.cpp b/backends/llamacpp/csrc/backend.cpp index f7e4cde2..b6b3de00 100644 --- a/backends/llamacpp/csrc/backend.cpp +++ b/backends/llamacpp/csrc/backend.cpp @@ -125,6 +125,12 @@ namespace huggingface::tgi::backends::llamacpp { // Update the batch for the next generation sampling_index = update_batch_for_decoding(batch, new_token_id, position); position += 1; + } else { + if (status == 1) { + return backend_error_t::NO_KV_SLOT_AVAILABLE; + } else { + return backend_error_t::DECODING_ERROR; + } } } diff --git a/backends/llamacpp/csrc/backend.hpp b/backends/llamacpp/csrc/backend.hpp index 38fd3aad..e1ab1e65 100644 --- a/backends/llamacpp/csrc/backend.hpp +++ b/backends/llamacpp/csrc/backend.hpp @@ -37,7 +37,9 @@ namespace huggingface::tgi::backends::llamacpp { */ enum backend_error_t : uint8_t { // Provided model filepath doesnt exist - MODEL_FILE_DOESNT_EXIST = 1 + MODEL_FILE_DOESNT_EXIST = 1, + NO_KV_SLOT_AVAILABLE = 2, + DECODING_ERROR = 3 }; /** diff --git a/backends/llamacpp/csrc/ffi.hpp b/backends/llamacpp/csrc/ffi.hpp index 99679fdb..2f143739 100644 --- a/backends/llamacpp/csrc/ffi.hpp +++ b/backends/llamacpp/csrc/ffi.hpp @@ -32,7 +32,6 @@ namespace huggingface::tgi::backends::llamacpp { #include "backends/llamacpp/src/lib.rs.h" #include "rust/cxx.h" - namespace huggingface::tgi::backends::llamacpp { /** @@ -56,7 +55,12 @@ namespace huggingface::tgi::backends::llamacpp { * llama.cpp backend specific exception mapped from `backend_exception_t` to throw at the FFI level and * allow automatic implementation of Result<_, Exception> from C++ to Rust */ - class llama_cpp_backend_exception_t : std::exception {}; + class llama_cpp_backend_exception_t : std::exception { + public: + backend_error_t error; + + llama_cpp_backend_exception_t(const backend_error_t error): error(error) {}; + }; /** * Llama.cpp frontend over the worker interfacing with Rust FFI layer @@ -119,7 +123,7 @@ namespace huggingface::tgi::backends::llamacpp { if(const auto result = worker_.generate(generation_context, context_forwarding_callback); result.has_value()) [[likely]] { return *result; } else { - throw llama_cpp_backend_exception_t {}; + throw llama_cpp_backend_exception_t(result.error()); } } }; @@ -232,5 +236,28 @@ namespace huggingface::tgi::backends::llamacpp { } } +// Error handle converting to rust Result +template +static void trycatch(Try &&func, Fail &&fail) noexcept try { + func(); +} catch (const huggingface::tgi::backends::llamacpp::llama_cpp_backend_exception_t &e) { + switch (e.error) { + case huggingface::tgi::backends::llamacpp::backend_error_t::MODEL_FILE_DOESNT_EXIST: { + fail("Specified model path doesn't exist."); + break; + } + case huggingface::tgi::backends::llamacpp::backend_error_t::NO_KV_SLOT_AVAILABLE: { + fail("Keys/Values cache is full, no slot available for the new batch."); + break; + } + case huggingface::tgi::backends::llamacpp::backend_error_t::DECODING_ERROR: { + fail("An error what detected during the generation."); + break; + } + } + fail(); +} + + #endif //TGI_LLAMA_CPP_BACKEND_FFI_HPP