From b2d3948ccfdda9bbc26fe21b93d376791fd162f2 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 14 Aug 2024 11:14:11 +0200 Subject: [PATCH] Fix idefics. --- server/text_generation_server/models/idefics.py | 1 + server/text_generation_server/models/idefics_causal_lm.py | 1 + 2 files changed, 2 insertions(+) diff --git a/server/text_generation_server/models/idefics.py b/server/text_generation_server/models/idefics.py index 29929b98..9058cb96 100644 --- a/server/text_generation_server/models/idefics.py +++ b/server/text_generation_server/models/idefics.py @@ -33,6 +33,7 @@ class IDEFICSSharded(IdeficsCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): + self.quantize = quantize self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/idefics_causal_lm.py b/server/text_generation_server/models/idefics_causal_lm.py index 8a80ed68..c5480952 100644 --- a/server/text_generation_server/models/idefics_causal_lm.py +++ b/server/text_generation_server/models/idefics_causal_lm.py @@ -580,6 +580,7 @@ class IdeficsCausalLM(Model): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): + self.quantize = quantize from text_generation_server.models.custom_modeling.idefics_modeling import ( IdeficsForVisionText2Text, )