Fixing cohere tokenizer. (#1697)

2024-04-05 16:44:19 +02:00 · 2024-04-05 16:44:19 +02:00 · f9958ee191
parent 5062fda4ff
commit f9958ee191
1 changed files with 2 additions and 2 deletions
--- a/server/text_generation_server/models/flash_cohere.py
+++ b/server/text_generation_server/models/flash_cohere.py
@ -3,7 +3,7 @@ import torch.distributed
 from opentelemetry import trace
 from typing import Optional
-from transformers.models.llama import LlamaTokenizerFast
+from transformers import AutoTokenizer
 from text_generation_server.models import FlashCausalLM
 from text_generation_server.models.custom_modeling.flash_cohere_modeling import (
@ -36,7 +36,7 @@ class FlashCohere(FlashCausalLM):
        else:
            raise NotImplementedError("FlashCohere is only available on GPU")
-        tokenizer = LlamaTokenizerFast.from_pretrained(
+        tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            revision=revision,
            padding_side="left",