Add an argument "negative_prompt" (#549)
* Add an argument "negative_prompt" * Fix argument order * Fix to use TypeError instead of ValueError * Removed needless batch_size multiplying * Fix to multiply by batch_size * Add truncation=True for long negative prompt * Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_onnx.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Update src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_onnx.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com> * Fix styles * Renamed ucond_tokens to uncond_tokens * Added description about "negative_prompt" Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
parent
7e92c5bc73
commit
5ac1f61cde
|
@ -116,6 +116,7 @@ class StableDiffusionPipeline(DiffusionPipeline):
|
||||||
width: int = 512,
|
width: int = 512,
|
||||||
num_inference_steps: int = 50,
|
num_inference_steps: int = 50,
|
||||||
guidance_scale: float = 7.5,
|
guidance_scale: float = 7.5,
|
||||||
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
||||||
eta: float = 0.0,
|
eta: float = 0.0,
|
||||||
generator: Optional[torch.Generator] = None,
|
generator: Optional[torch.Generator] = None,
|
||||||
latents: Optional[torch.FloatTensor] = None,
|
latents: Optional[torch.FloatTensor] = None,
|
||||||
|
@ -144,6 +145,9 @@ class StableDiffusionPipeline(DiffusionPipeline):
|
||||||
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
||||||
1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
|
1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
|
||||||
usually at the expense of lower image quality.
|
usually at the expense of lower image quality.
|
||||||
|
negative_prompt (`str` or `List[str]`, *optional*):
|
||||||
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
||||||
|
if `guidance_scale` is less than `1`).
|
||||||
eta (`float`, *optional*, defaults to 0.0):
|
eta (`float`, *optional*, defaults to 0.0):
|
||||||
Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
|
Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
|
||||||
[`schedulers.DDIMScheduler`], will be ignored for others.
|
[`schedulers.DDIMScheduler`], will be ignored for others.
|
||||||
|
@ -217,9 +221,32 @@ class StableDiffusionPipeline(DiffusionPipeline):
|
||||||
do_classifier_free_guidance = guidance_scale > 1.0
|
do_classifier_free_guidance = guidance_scale > 1.0
|
||||||
# get unconditional embeddings for classifier free guidance
|
# get unconditional embeddings for classifier free guidance
|
||||||
if do_classifier_free_guidance:
|
if do_classifier_free_guidance:
|
||||||
|
uncond_tokens: List[str]
|
||||||
|
if negative_prompt is None:
|
||||||
|
uncond_tokens = [""] * batch_size
|
||||||
|
elif type(prompt) is not type(negative_prompt):
|
||||||
|
raise TypeError(
|
||||||
|
"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
||||||
|
" {type(prompt)}."
|
||||||
|
)
|
||||||
|
elif isinstance(negative_prompt, str):
|
||||||
|
uncond_tokens = [negative_prompt] * batch_size
|
||||||
|
elif batch_size != len(negative_prompt):
|
||||||
|
raise ValueError(
|
||||||
|
f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
|
||||||
|
f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
|
||||||
|
" the batch size of `prompt`."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
uncond_tokens = negative_prompt
|
||||||
|
|
||||||
max_length = text_input_ids.shape[-1]
|
max_length = text_input_ids.shape[-1]
|
||||||
uncond_input = self.tokenizer(
|
uncond_input = self.tokenizer(
|
||||||
[""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
|
uncond_tokens,
|
||||||
|
padding="max_length",
|
||||||
|
max_length=max_length,
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt",
|
||||||
)
|
)
|
||||||
uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
|
uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
|
||||||
|
|
||||||
|
|
|
@ -128,6 +128,7 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||||
strength: float = 0.8,
|
strength: float = 0.8,
|
||||||
num_inference_steps: Optional[int] = 50,
|
num_inference_steps: Optional[int] = 50,
|
||||||
guidance_scale: Optional[float] = 7.5,
|
guidance_scale: Optional[float] = 7.5,
|
||||||
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
||||||
eta: Optional[float] = 0.0,
|
eta: Optional[float] = 0.0,
|
||||||
generator: Optional[torch.Generator] = None,
|
generator: Optional[torch.Generator] = None,
|
||||||
output_type: Optional[str] = "pil",
|
output_type: Optional[str] = "pil",
|
||||||
|
@ -160,6 +161,9 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||||
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
||||||
1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
|
1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
|
||||||
usually at the expense of lower image quality.
|
usually at the expense of lower image quality.
|
||||||
|
negative_prompt (`str` or `List[str]`, *optional*):
|
||||||
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
||||||
|
if `guidance_scale` is less than `1`).
|
||||||
eta (`float`, *optional*, defaults to 0.0):
|
eta (`float`, *optional*, defaults to 0.0):
|
||||||
Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
|
Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
|
||||||
[`schedulers.DDIMScheduler`], will be ignored for others.
|
[`schedulers.DDIMScheduler`], will be ignored for others.
|
||||||
|
@ -258,9 +262,28 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
||||||
do_classifier_free_guidance = guidance_scale > 1.0
|
do_classifier_free_guidance = guidance_scale > 1.0
|
||||||
# get unconditional embeddings for classifier free guidance
|
# get unconditional embeddings for classifier free guidance
|
||||||
if do_classifier_free_guidance:
|
if do_classifier_free_guidance:
|
||||||
|
uncond_tokens: List[str]
|
||||||
|
if negative_prompt is None:
|
||||||
|
uncond_tokens = [""] * batch_size
|
||||||
|
elif type(prompt) is not type(negative_prompt):
|
||||||
|
raise TypeError(
|
||||||
|
"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
||||||
|
" {type(prompt)}."
|
||||||
|
)
|
||||||
|
elif isinstance(negative_prompt, str):
|
||||||
|
uncond_tokens = [negative_prompt] * batch_size
|
||||||
|
elif batch_size != len(negative_prompt):
|
||||||
|
raise ValueError("The length of `negative_prompt` should be equal to batch_size.")
|
||||||
|
else:
|
||||||
|
uncond_tokens = negative_prompt
|
||||||
|
|
||||||
max_length = text_input_ids.shape[-1]
|
max_length = text_input_ids.shape[-1]
|
||||||
uncond_input = self.tokenizer(
|
uncond_input = self.tokenizer(
|
||||||
[""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
|
uncond_tokens,
|
||||||
|
padding="max_length",
|
||||||
|
max_length=max_length,
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt",
|
||||||
)
|
)
|
||||||
uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
|
uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
|
||||||
|
|
||||||
|
|
|
@ -144,6 +144,7 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
|
||||||
strength: float = 0.8,
|
strength: float = 0.8,
|
||||||
num_inference_steps: Optional[int] = 50,
|
num_inference_steps: Optional[int] = 50,
|
||||||
guidance_scale: Optional[float] = 7.5,
|
guidance_scale: Optional[float] = 7.5,
|
||||||
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
||||||
eta: Optional[float] = 0.0,
|
eta: Optional[float] = 0.0,
|
||||||
generator: Optional[torch.Generator] = None,
|
generator: Optional[torch.Generator] = None,
|
||||||
output_type: Optional[str] = "pil",
|
output_type: Optional[str] = "pil",
|
||||||
|
@ -180,6 +181,9 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
|
||||||
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
||||||
1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
|
1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
|
||||||
usually at the expense of lower image quality.
|
usually at the expense of lower image quality.
|
||||||
|
negative_prompt (`str` or `List[str]`, *optional*):
|
||||||
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
||||||
|
if `guidance_scale` is less than `1`).
|
||||||
eta (`float`, *optional*, defaults to 0.0):
|
eta (`float`, *optional*, defaults to 0.0):
|
||||||
Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
|
Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
|
||||||
[`schedulers.DDIMScheduler`], will be ignored for others.
|
[`schedulers.DDIMScheduler`], will be ignored for others.
|
||||||
|
@ -292,9 +296,32 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
|
||||||
do_classifier_free_guidance = guidance_scale > 1.0
|
do_classifier_free_guidance = guidance_scale > 1.0
|
||||||
# get unconditional embeddings for classifier free guidance
|
# get unconditional embeddings for classifier free guidance
|
||||||
if do_classifier_free_guidance:
|
if do_classifier_free_guidance:
|
||||||
|
uncond_tokens: List[str]
|
||||||
|
if negative_prompt is None:
|
||||||
|
uncond_tokens = [""] * batch_size
|
||||||
|
elif type(prompt) is not type(negative_prompt):
|
||||||
|
raise TypeError(
|
||||||
|
"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
||||||
|
" {type(prompt)}."
|
||||||
|
)
|
||||||
|
elif isinstance(negative_prompt, str):
|
||||||
|
uncond_tokens = [negative_prompt] * batch_size
|
||||||
|
elif batch_size != len(negative_prompt):
|
||||||
|
raise ValueError(
|
||||||
|
f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
|
||||||
|
f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
|
||||||
|
" the batch size of `prompt`."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
uncond_tokens = negative_prompt
|
||||||
|
|
||||||
max_length = text_input_ids.shape[-1]
|
max_length = text_input_ids.shape[-1]
|
||||||
uncond_input = self.tokenizer(
|
uncond_input = self.tokenizer(
|
||||||
[""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
|
uncond_tokens,
|
||||||
|
padding="max_length",
|
||||||
|
max_length=max_length,
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="pt",
|
||||||
)
|
)
|
||||||
uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
|
uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,7 @@ class StableDiffusionOnnxPipeline(DiffusionPipeline):
|
||||||
width: Optional[int] = 512,
|
width: Optional[int] = 512,
|
||||||
num_inference_steps: Optional[int] = 50,
|
num_inference_steps: Optional[int] = 50,
|
||||||
guidance_scale: Optional[float] = 7.5,
|
guidance_scale: Optional[float] = 7.5,
|
||||||
|
negative_prompt: Optional[Union[str, List[str]]] = None,
|
||||||
eta: Optional[float] = 0.0,
|
eta: Optional[float] = 0.0,
|
||||||
latents: Optional[np.ndarray] = None,
|
latents: Optional[np.ndarray] = None,
|
||||||
output_type: Optional[str] = "pil",
|
output_type: Optional[str] = "pil",
|
||||||
|
@ -102,9 +103,32 @@ class StableDiffusionOnnxPipeline(DiffusionPipeline):
|
||||||
do_classifier_free_guidance = guidance_scale > 1.0
|
do_classifier_free_guidance = guidance_scale > 1.0
|
||||||
# get unconditional embeddings for classifier free guidance
|
# get unconditional embeddings for classifier free guidance
|
||||||
if do_classifier_free_guidance:
|
if do_classifier_free_guidance:
|
||||||
|
uncond_tokens: List[str]
|
||||||
|
if negative_prompt is None:
|
||||||
|
uncond_tokens = [""] * batch_size
|
||||||
|
elif type(prompt) is not type(negative_prompt):
|
||||||
|
raise TypeError(
|
||||||
|
"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
|
||||||
|
" {type(prompt)}."
|
||||||
|
)
|
||||||
|
elif isinstance(negative_prompt, str):
|
||||||
|
uncond_tokens = [negative_prompt] * batch_size
|
||||||
|
elif batch_size != len(negative_prompt):
|
||||||
|
raise ValueError(
|
||||||
|
f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
|
||||||
|
f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
|
||||||
|
" the batch size of `prompt`."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
uncond_tokens = negative_prompt
|
||||||
|
|
||||||
max_length = text_input_ids.shape[-1]
|
max_length = text_input_ids.shape[-1]
|
||||||
uncond_input = self.tokenizer(
|
uncond_input = self.tokenizer(
|
||||||
[""] * batch_size, padding="max_length", max_length=max_length, return_tensors="np"
|
uncond_tokens,
|
||||||
|
padding="max_length",
|
||||||
|
max_length=max_length,
|
||||||
|
truncation=True,
|
||||||
|
return_tensors="np",
|
||||||
)
|
)
|
||||||
uncond_embeddings = self.text_encoder(input_ids=uncond_input.input_ids.astype(np.int32))[0]
|
uncond_embeddings = self.text_encoder(input_ids=uncond_input.input_ids.astype(np.int32))[0]
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue