fix(server): Fixing RW code (it's remote code so the Arch checking doesn't work to see which weights to keep). (#579)

Fixes #555
2023-07-12 09:51:34 +02:00 · 2023-07-12 09:51:34 +02:00 · f0181436f4
parent b4024edd45
commit f0181436f4
1 changed files with 7 additions and 1 deletions
--- a/server/text_generation_server/models/flash_rw.py
+++ b/server/text_generation_server/models/flash_rw.py
@ -49,7 +49,13 @@ class FlashRWSharded(FlashCausalLM):

        torch.distributed.barrier(group=self.process_group)
        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
-        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        weights = Weights(
+            filenames,
+            device,
+            dtype,
+            process_group=self.process_group,
+            aliases={"transformer.word_embeddings.weight": ["lm_head.weight"]},
+        )

        config.quantize = quantize