feat(backend): better map exception throw on C++ side

2024-11-29 23:34:16 +01:00 · 2024-11-29 23:34:16 +01:00 · c9f6c3a8f7
parent db41776a0e
commit c9f6c3a8f7
3 changed files with 39 additions and 4 deletions
--- a/backends/llamacpp/csrc/backend.cpp
+++ b/backends/llamacpp/csrc/backend.cpp
@ -125,6 +125,12 @@ namespace huggingface::tgi::backends::llamacpp {
                    // Update the batch for the next generation
                    sampling_index = update_batch_for_decoding(batch, new_token_id, position);
                    position += 1;
+                } else {
+                    if (status == 1) {
+                        return backend_error_t::NO_KV_SLOT_AVAILABLE;
+                    } else {
+                        return backend_error_t::DECODING_ERROR;
+                    }
                }
            }

--- a/backends/llamacpp/csrc/backend.hpp
+++ b/backends/llamacpp/csrc/backend.hpp
@ -37,7 +37,9 @@ namespace huggingface::tgi::backends::llamacpp {
     */
    enum backend_error_t : uint8_t {
        // Provided model filepath doesnt exist
-        MODEL_FILE_DOESNT_EXIST = 1
+        MODEL_FILE_DOESNT_EXIST = 1,
+        NO_KV_SLOT_AVAILABLE = 2,
+        DECODING_ERROR = 3
    };

    /**
--- a/backends/llamacpp/csrc/ffi.hpp
+++ b/backends/llamacpp/csrc/ffi.hpp
@ -32,7 +32,6 @@ namespace huggingface::tgi::backends::llamacpp {
 #include "backends/llamacpp/src/lib.rs.h"
 #include "rust/cxx.h"

-
 namespace huggingface::tgi::backends::llamacpp {

    /**
@ -56,7 +55,12 @@ namespace huggingface::tgi::backends::llamacpp {
     * llama.cpp backend specific exception mapped from `backend_exception_t` to throw at the FFI level and
     * allow automatic implementation of Result<_, Exception> from C++ to Rust
     */
-    class llama_cpp_backend_exception_t : std::exception {};
+    class llama_cpp_backend_exception_t : std::exception {
+    public:
+        backend_error_t error;
+
+        llama_cpp_backend_exception_t(const backend_error_t error): error(error) {};
+    };

    /**
     * Llama.cpp frontend over the worker interfacing with Rust FFI layer
@ -119,7 +123,7 @@ namespace huggingface::tgi::backends::llamacpp {
            if(const auto result = worker_.generate(generation_context, context_forwarding_callback); result.has_value()) [[likely]] {
                return *result;
            } else {
-                throw llama_cpp_backend_exception_t {};
+                throw llama_cpp_backend_exception_t(result.error());
            }
        }
    };
@ -232,5 +236,28 @@ namespace huggingface::tgi::backends::llamacpp {
    }
 }

+// Error handle converting to rust Result<T, CxxError>
+template <typename Try, typename Fail>
+static void trycatch(Try &&func, Fail &&fail) noexcept try {
+    func();
+} catch (const huggingface::tgi::backends::llamacpp::llama_cpp_backend_exception_t &e) {
+    switch (e.error) {
+        case huggingface::tgi::backends::llamacpp::backend_error_t::MODEL_FILE_DOESNT_EXIST: {
+            fail("Specified model path doesn't exist.");
+            break;
+        }
+        case huggingface::tgi::backends::llamacpp::backend_error_t::NO_KV_SLOT_AVAILABLE: {
+            fail("Keys/Values cache is full, no slot available for the new batch.");
+            break;
+        }
+        case huggingface::tgi::backends::llamacpp::backend_error_t::DECODING_ERROR: {
+            fail("An error what detected during the generation.");
+            break;
+        }
+    }
+    fail();
+}
+
+

 #endif //TGI_LLAMA_CPP_BACKEND_FFI_HPP