From f9958ee191057cadf0c6c1f41577eae86819812c Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 5 Apr 2024 16:44:19 +0200 Subject: [PATCH] Fixing cohere tokenizer. (#1697) --- server/text_generation_server/models/flash_cohere.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/text_generation_server/models/flash_cohere.py b/server/text_generation_server/models/flash_cohere.py index 33b053a6..181a93b1 100644 --- a/server/text_generation_server/models/flash_cohere.py +++ b/server/text_generation_server/models/flash_cohere.py @@ -3,7 +3,7 @@ import torch.distributed from opentelemetry import trace from typing import Optional -from transformers.models.llama import LlamaTokenizerFast +from transformers import AutoTokenizer from text_generation_server.models import FlashCausalLM from text_generation_server.models.custom_modeling.flash_cohere_modeling import ( @@ -36,7 +36,7 @@ class FlashCohere(FlashCausalLM): else: raise NotImplementedError("FlashCohere is only available on GPU") - tokenizer = LlamaTokenizerFast.from_pretrained( + tokenizer = AutoTokenizer.from_pretrained( model_id, revision=revision, padding_side="left",