diff --git a/modules/models/sd3/other_impls.py b/modules/models/sd3/other_impls.py
index cd10edc8d..6e4c5d10d 100644
--- a/modules/models/sd3/other_impls.py
+++ b/modules/models/sd3/other_impls.py
@@ -262,8 +262,7 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
 
     def forward(self, tokens):
         backup_embeds = self.transformer.get_input_embeddings()
-        device = backup_embeds.weight.device
-        tokens = torch.LongTensor(tokens).to(device)
+        tokens = torch.asarray(tokens, dtype=torch.int64, device=backup_embeds.weight.device)
         outputs = self.transformer(tokens, intermediate_output=self.layer_idx, final_layer_norm_intermediate=self.layer_norm_hidden_state)
         self.transformer.set_input_embeddings(backup_embeds)
         if self.layer == "last":
diff --git a/modules/models/sd3/sd3_model.py b/modules/models/sd3/sd3_model.py
index d60b04e4e..bb3e6a3d0 100644
--- a/modules/models/sd3/sd3_model.py
+++ b/modules/models/sd3/sd3_model.py
@@ -149,7 +149,8 @@ class SD3Inferencer(torch.nn.Module):
         return contextlib.nullcontext()
 
     def get_learned_conditioning(self, batch: list[str]):
-        return self.cond_stage_model(batch)
+        with devices.without_autocast():
+            return self.cond_stage_model(batch)
 
     def apply_model(self, x, t, cond):
         return self.model.apply_model(x, t, c_crossattn=cond['crossattn'], y=cond['vector'])