Merge c07f54aac2 into 88702d8763

2024-04-15 16:31:52 -04:00 · 2024-04-15 16:31:52 -04:00 · 9b8a7efb4b
parent 88702d8763 c07f54aac2
commit 9b8a7efb4b
1 changed files with 3 additions and 1 deletions
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@ -212,6 +212,8 @@ class Fp8Linear(nn.Module):
        self.bias = bias if bias is not None else None

    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        if (bsz := input.shape[0]) & 15:
+            input = F.pad(input,(0, 0, 0, 16 - (bsz & 15)))
        qinput, scale = fp8_quantize(input)
        output, _ = torch._scaled_mm(
            qinput,
@ -221,7 +223,7 @@ class Fp8Linear(nn.Module):
            scale_b=self.scale,
            bias=self.bias,
        )
-        return output
+        return output[:bsz]


 class Linear8bitLt(nn.Module):