diff --git a/modules/models/sd3/other_impls.py b/modules/models/sd3/other_impls.py index cd10edc8d..6e4c5d10d 100644 --- a/modules/models/sd3/other_impls.py +++ b/modules/models/sd3/other_impls.py @@ -262,8 +262,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): def forward(self, tokens): backup_embeds = self.transformer.get_input_embeddings() - device = backup_embeds.weight.device - tokens = torch.LongTensor(tokens).to(device) + tokens = torch.asarray(tokens, dtype=torch.int64, device=backup_embeds.weight.device) outputs = self.transformer(tokens, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state) self.transformer.set_input_embeddings(backup_embeds) if self.layer == "last": diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py index d60b04e4e..bb3e6a3d0 100644 --- a/modules/models/sd3/sd3_model.py +++ b/modules/models/sd3/sd3_model.py @@ -149,7 +149,8 @@ class SD3Inferencer(torch.nn.Module): return contextlib.nullcontext() def get_learned_conditioning(self, batch: list[str]): - return self.cond_stage_model(batch) + with devices.without_autocast(): + return self.cond_stage_model(batch) def apply_model(self, x, t, cond): return self.model.apply_model(x, t, c_crossattn=cond['crossattn'], y=cond['vector'])