diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 22bdda3b..8e5c9dcd 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -104,6 +104,9 @@ fn resolve_attention(config: &Option<Config>, lora_adapters: &Option<String>) ->
                     tracing::info!("Forcing flash decoding because head dim is not supported by flashinfer, also disabling prefix caching");
                     attention = Some("flashdecoding".to_string());
                 }
+                if prefix_caching.is_none() {
+                    prefix_caching = Some("0".to_string());
+                }
             }
         }
     }
diff --git a/router/src/server.rs b/router/src/server.rs
index f273a786..6b7fff81 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -1748,7 +1748,7 @@ pub async fn run(
         let mut tokenizer = Tokenizer::from_file(filename).ok();
         if let Some(tokenizer) = &mut tokenizer {
             if let Some(class) = &tokenizer_config.tokenizer_class {
-                if class == "LlamaTokenizer" || class == "LlamaTokenizerFast"{
+                if class == "LlamaTokenizer" || class == "LlamaTokenizerFast" || class == "CohereTokenizerFast"{
                     if let Ok(post_processor) = create_post_processor(tokenizer, &tokenizer_config) {
                         tracing::info!("Overriding LlamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205");
                         tokenizer.with_post_processor(post_processor);
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index 4fa9e66d..e03cc30d 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -497,15 +497,14 @@ def get_model(
         else -1
     )
 
-    should_use_sliding_window = (
-        sliding_window is not None and sliding_window != -1 and SUPPORTS_WINDOWING
+    use_sliding_window = sliding_window is not None and sliding_window != -1
+    needs_sliding_window = (
+        max_input_tokens is not None and max_input_tokens > sliding_window
     )
-
-    if should_use_sliding_window:
-        if max_input_tokens is not None and max_input_tokens > sliding_window:
-            raise ValueError(
-                f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})."
-            )
+    if use_sliding_window and needs_sliding_window and not SUPPORTS_WINDOWING:
+        raise ValueError(
+            f"The backend {SYSTEM} does not support sliding window attention that is used by the model type {model_type}. To use this model nonetheless with the {SYSTEM} backend, please launch TGI with the argument `--max-input-tokens` smaller than sliding_window={sliding_window} (got here max_input_tokens={max_input_tokens})."
+        )
 
     if model_type == DEEPSEEK_V2:
         if FLASH_ATTENTION: