enable HuggingFaceM4/idefics-9b in intel gpu (#2338)

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
2024-08-01 17:08:36 +08:00 · 2024-08-01 17:08:36 +08:00 · 9ab9937414
parent 7451041ecd
commit 9ab9937414
2 changed files with 23 additions and 1 deletions
--- a/server/text_generation_server/models/custom_modeling/idefics_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/idefics_modeling.py
@ -351,7 +351,19 @@ class IdeficsRMSNorm(nn.Module):
        self.variance_epsilon = eps

    def forward(self, hidden_states, residual=None):
-        if hidden_states.shape[-1] > 8192:
+        if SYSTEM == "ipex":
+            import intel_extension_for_pytorch as ipex
+
+            out = ipex.llm.functional.add_rms_norm(
+                residual,
+                hidden_states,
+                self.weight,
+                None,
+                self.variance_epsilon,
+                residual is not None,
+            )
+            return out
+        elif hidden_states.shape[-1] > 8192:
            if residual is not None:
                hidden_states += residual
            residual = hidden_states
--- a/server/text_generation_server/models/idefics.py
+++ b/server/text_generation_server/models/idefics.py
@ -20,6 +20,8 @@ from text_generation_server.utils import (
 )
 from text_generation_server.utils.quantization import get_loader

+from text_generation_server.utils.import_utils import SYSTEM
+

 class IDEFICSSharded(IdeficsCausalLM):
    def __init__(
@ -37,6 +39,14 @@ class IDEFICSSharded(IdeficsCausalLM):
            # 9b seems to work correctly enough in float16, but 80b seems
            # to be really saturating for f16.
            dtype = torch.float16 if dtype is None else dtype
+        elif SYSTEM == "ipex":
+            if hasattr(torch, "xpu") and torch.xpu.is_available():
+                device = torch.device(f"xpu:{rank}")
+                dtype = torch.float16 if dtype is None else dtype
+            else:
+                device = torch.device("cpu")
+                # Float16 doesn't exist on target.
+                dtype = torch.bfloat16 if dtype is None else dtype
        else:
            device = torch.device("cpu")
            dtype = torch.float32 if dtype is None else dtype