From c9f6c3a8f79d12346372ba786db9be9cd010a40b Mon Sep 17 00:00:00 2001
From: Morgan Funtowicz <funtowiczmo@gmail.com>
Date: Fri, 29 Nov 2024 23:34:16 +0100
Subject: [PATCH] feat(backend): better map exception throw on C++ side

---
 backends/llamacpp/csrc/backend.cpp |  6 ++++++
 backends/llamacpp/csrc/backend.hpp |  4 +++-
 backends/llamacpp/csrc/ffi.hpp     | 33 +++++++++++++++++++++++++++---
 3 files changed, 39 insertions(+), 4 deletions(-)
diff --git a/backends/llamacpp/csrc/backend.cpp b/backends/llamacpp/csrc/backend.cpp
index f7e4cde2..b6b3de00 100644
--- a/backends/llamacpp/csrc/backend.cpp
+++ b/backends/llamacpp/csrc/backend.cpp
@@ -125,6 +125,12 @@ namespace huggingface::tgi::backends::llamacpp {
                     // Update the batch for the next generation
                     sampling_index = update_batch_for_decoding(batch, new_token_id, position);
                     position += 1;
+                } else {
+                    if (status == 1) {
+                        return backend_error_t::NO_KV_SLOT_AVAILABLE;
+                    } else {
+                        return backend_error_t::DECODING_ERROR;
+                    }
                 }
             }
 
diff --git a/backends/llamacpp/csrc/backend.hpp b/backends/llamacpp/csrc/backend.hpp
index 38fd3aad..e1ab1e65 100644
--- a/backends/llamacpp/csrc/backend.hpp
+++ b/backends/llamacpp/csrc/backend.hpp
@@ -37,7 +37,9 @@ namespace huggingface::tgi::backends::llamacpp {
      */
     enum backend_error_t : uint8_t {
         // Provided model filepath doesnt exist
-        MODEL_FILE_DOESNT_EXIST = 1
+        MODEL_FILE_DOESNT_EXIST = 1,
+        NO_KV_SLOT_AVAILABLE = 2,
+        DECODING_ERROR = 3
     };
 
     /**
diff --git a/backends/llamacpp/csrc/ffi.hpp b/backends/llamacpp/csrc/ffi.hpp
index 99679fdb..2f143739 100644
--- a/backends/llamacpp/csrc/ffi.hpp
+++ b/backends/llamacpp/csrc/ffi.hpp
@@ -32,7 +32,6 @@ namespace huggingface::tgi::backends::llamacpp {
 #include "backends/llamacpp/src/lib.rs.h"
 #include "rust/cxx.h"
 
-
 namespace huggingface::tgi::backends::llamacpp {
 
     /**
@@ -56,7 +55,12 @@ namespace huggingface::tgi::backends::llamacpp {
      * llama.cpp backend specific exception mapped from `backend_exception_t` to throw at the FFI level and
      * allow automatic implementation of Result<_, Exception> from C++ to Rust
      */
-    class llama_cpp_backend_exception_t : std::exception {};
+    class llama_cpp_backend_exception_t : std::exception {
+    public:
+        backend_error_t error;
+
+        llama_cpp_backend_exception_t(const backend_error_t error): error(error) {};
+    };
 
     /**
      * Llama.cpp frontend over the worker interfacing with Rust FFI layer
@@ -119,7 +123,7 @@ namespace huggingface::tgi::backends::llamacpp {
             if(const auto result = worker_.generate(generation_context, context_forwarding_callback); result.has_value()) [[likely]] {
                 return *result;
             } else {
-                throw llama_cpp_backend_exception_t {};
+                throw llama_cpp_backend_exception_t(result.error());
             }
         }
     };
@@ -232,5 +236,28 @@ namespace huggingface::tgi::backends::llamacpp {
     }
 }
 
+// Error handle converting to rust Result<T, CxxError>
+template <typename Try, typename Fail>
+static void trycatch(Try &&func, Fail &&fail) noexcept try {
+    func();
+} catch (const huggingface::tgi::backends::llamacpp::llama_cpp_backend_exception_t &e) {
+    switch (e.error) {
+        case huggingface::tgi::backends::llamacpp::backend_error_t::MODEL_FILE_DOESNT_EXIST: {
+            fail("Specified model path doesn't exist.");
+            break;
+        }
+        case huggingface::tgi::backends::llamacpp::backend_error_t::NO_KV_SLOT_AVAILABLE: {
+            fail("Keys/Values cache is full, no slot available for the new batch.");
+            break;
+        }
+        case huggingface::tgi::backends::llamacpp::backend_error_t::DECODING_ERROR: {
+            fail("An error what detected during the generation.");
+            break;
+        }
+    }
+    fail();
+}
+
+
 
 #endif //TGI_LLAMA_CPP_BACKEND_FFI_HPP