diff --git a/server/text_generation_server/models/bloom.py b/server/text_generation_server/models/bloom.py index 65c9f317..156b660f 100644 --- a/server/text_generation_server/models/bloom.py +++ b/server/text_generation_server/models/bloom.py @@ -90,6 +90,7 @@ class BLOOMSharded(CausalLM): torch.distributed.barrier(group=self.process_group) super(CausalLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True, diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py index e896c831..10c64c66 100644 --- a/server/text_generation_server/models/causal_lm.py +++ b/server/text_generation_server/models/causal_lm.py @@ -538,6 +538,7 @@ class CausalLM(Model): tokenizer.add_special_tokens({"pad_token": "[PAD]"}) super(CausalLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True, diff --git a/server/text_generation_server/models/galactica.py b/server/text_generation_server/models/galactica.py index d0f2b915..e74d2cbf 100644 --- a/server/text_generation_server/models/galactica.py +++ b/server/text_generation_server/models/galactica.py @@ -212,6 +212,7 @@ class GalacticaSharded(CausalLM): torch.distributed.barrier(group=self.process_group) super(CausalLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True, diff --git a/server/text_generation_server/models/gpt_neox.py b/server/text_generation_server/models/gpt_neox.py index c0e1adf2..0131bffc 100644 --- a/server/text_generation_server/models/gpt_neox.py +++ b/server/text_generation_server/models/gpt_neox.py @@ -65,6 +65,7 @@ class GPTNeoxSharded(CausalLM): torch.distributed.barrier(group=self.process_group) super(CausalLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True, diff --git a/server/text_generation_server/models/mpt.py b/server/text_generation_server/models/mpt.py index 8d8b4909..c58b387e 100644 --- a/server/text_generation_server/models/mpt.py +++ b/server/text_generation_server/models/mpt.py @@ -90,6 +90,7 @@ class MPTSharded(CausalLM): torch.distributed.barrier(group=self.process_group) super(CausalLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=False, diff --git a/server/text_generation_server/models/opt.py b/server/text_generation_server/models/opt.py index 5b84f4ff..aaf81f21 100644 --- a/server/text_generation_server/models/opt.py +++ b/server/text_generation_server/models/opt.py @@ -63,6 +63,7 @@ class OPTSharded(CausalLM): torch.distributed.barrier(group=self.process_group) super(CausalLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True, diff --git a/server/text_generation_server/models/phi.py b/server/text_generation_server/models/phi.py index d68866c1..93d42b2b 100644 --- a/server/text_generation_server/models/phi.py +++ b/server/text_generation_server/models/phi.py @@ -60,6 +60,7 @@ class Phi(CausalLM): model = PhiForCausalLM(config, weights) torch.distributed.barrier(group=self.process_group) super(CausalLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True, diff --git a/server/text_generation_server/models/rw.py b/server/text_generation_server/models/rw.py index d4764ded..bc2b6db2 100644 --- a/server/text_generation_server/models/rw.py +++ b/server/text_generation_server/models/rw.py @@ -62,6 +62,7 @@ class RW(CausalLM): tokenizer.add_special_tokens({"pad_token": "[PAD]"}) super(CausalLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True, diff --git a/server/text_generation_server/models/santacoder.py b/server/text_generation_server/models/santacoder.py index 323e4324..caddbe19 100644 --- a/server/text_generation_server/models/santacoder.py +++ b/server/text_generation_server/models/santacoder.py @@ -62,6 +62,7 @@ class SantaCoder(CausalLM): ) super(CausalLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True, diff --git a/server/text_generation_server/models/seq2seq_lm.py b/server/text_generation_server/models/seq2seq_lm.py index 3bd09556..d454d804 100644 --- a/server/text_generation_server/models/seq2seq_lm.py +++ b/server/text_generation_server/models/seq2seq_lm.py @@ -575,6 +575,7 @@ class Seq2SeqLM(Model): tokenizer.bos_token_id = model.config.decoder_start_token_id super(Seq2SeqLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True, diff --git a/server/text_generation_server/models/t5.py b/server/text_generation_server/models/t5.py index 8e0735e5..adef664c 100644 --- a/server/text_generation_server/models/t5.py +++ b/server/text_generation_server/models/t5.py @@ -73,6 +73,7 @@ class T5Sharded(Seq2SeqLM): torch.distributed.barrier(group=self.process_group) super(Seq2SeqLM, self).__init__( + model_id=model_id, model=model, tokenizer=tokenizer, requires_padding=True,