From 64e981fdcf08c1750b75593777aa50d65bfe6a6f Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 24 Sep 2024 10:53:19 +0000 Subject: [PATCH] fix issue for sliding window models --- server/text_generation_server/layers/attention/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/text_generation_server/layers/attention/common.py b/server/text_generation_server/layers/attention/common.py index 855f4dfc..d6e512c0 100644 --- a/server/text_generation_server/layers/attention/common.py +++ b/server/text_generation_server/layers/attention/common.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from text_generation_server.utils.import_utils import SYSTEM from text_generation_server.models.globals import ATTENTION import torch from typing import Optional @@ -65,5 +66,7 @@ else: max_k: int def clamp(self, max): + if SYSTEM == "rocm": + return self raise NotImplementedError("Not implemented seqlen for paged") return Seqlen(torch.clamp(self.input_lengths, max=max))