more fixes
This commit is contained in:
parent
e45dae7dc0
commit
48269070d2
|
@ -249,24 +249,24 @@ image_pil = PIL.Image.fromarray(image_processed[0])
|
||||||
image_pil.save("test.png")
|
image_pil.save("test.png")
|
||||||
```
|
```
|
||||||
|
|
||||||
#### **Text to speech with GradTTS and BDDM**
|
#### **Text to speech with GradTTS and BDDMPipeline**
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import torch
|
import torch
|
||||||
from diffusers import BDDM, GradTTS
|
from diffusers import BDDMPipeline, GradTTS
|
||||||
|
|
||||||
torch_device = "cuda"
|
torch_device = "cuda"
|
||||||
|
|
||||||
# load grad tts and bddm pipelines
|
# load grad tts and bddm pipelines
|
||||||
grad_tts = GradTTS.from_pretrained("fusing/grad-tts-libri-tts")
|
grad_tts = GradTTS.from_pretrained("fusing/grad-tts-libri-tts")
|
||||||
bddm = BDDM.from_pretrained("fusing/diffwave-vocoder-ljspeech")
|
bddm = BDDMPipeline.from_pretrained("fusing/diffwave-vocoder-ljspeech")
|
||||||
|
|
||||||
text = "Hello world, I missed you so much."
|
text = "Hello world, I missed you so much."
|
||||||
|
|
||||||
# generate mel spectograms using text
|
# generate mel spectograms using text
|
||||||
mel_spec = grad_tts(text, torch_device=torch_device)
|
mel_spec = grad_tts(text, torch_device=torch_device)
|
||||||
|
|
||||||
# generate the speech by passing mel spectograms to BDDM pipeline
|
# generate the speech by passing mel spectograms to BDDMPipeline pipeline
|
||||||
generator = torch.manual_seed(42)
|
generator = torch.manual_seed(42)
|
||||||
audio = bddm(mel_spec, generator, torch_device=torch_device)
|
audio = bddm(mel_spec, generator, torch_device=torch_device)
|
||||||
|
|
||||||
|
|
|
@ -11,19 +11,19 @@ from .models.unet import UNetModel
|
||||||
from .models.unet_ldm import UNetLDMModel
|
from .models.unet_ldm import UNetLDMModel
|
||||||
from .models.unet_rl import TemporalUNet
|
from .models.unet_rl import TemporalUNet
|
||||||
from .pipeline_utils import DiffusionPipeline
|
from .pipeline_utils import DiffusionPipeline
|
||||||
from .pipelines import BDDM, DDIM, DDPM, PNDM
|
from .pipelines import BDDMPipeline, DDIMPipeline, DDPMPipeline, PNDMPipeline
|
||||||
from .schedulers import DDIMScheduler, DDPMScheduler, GradTTSScheduler, PNDMScheduler, SchedulerMixin
|
from .schedulers import DDIMScheduler, DDPMScheduler, GradTTSScheduler, PNDMScheduler, SchedulerMixin
|
||||||
|
|
||||||
|
|
||||||
if is_transformers_available():
|
if is_transformers_available():
|
||||||
from .models.unet_glide import GlideSuperResUNetModel, GlideTextToImageUNetModel, GlideUNetModel
|
from .models.unet_glide import GlideSuperResUNetModel, GlideTextToImageUNetModel, GlideUNetModel
|
||||||
from .models.unet_grad_tts import UNetGradTTSModel
|
from .models.unet_grad_tts import UNetGradTTSModel
|
||||||
from .pipelines import Glide, LatentDiffusion
|
from .pipelines import GlidePipeline, LatentDiffusionPipeline
|
||||||
else:
|
else:
|
||||||
from .utils.dummy_transformers_objects import *
|
from .utils.dummy_transformers_objects import *
|
||||||
|
|
||||||
|
|
||||||
if is_transformers_available() and is_inflect_available() and is_unidecode_available():
|
if is_transformers_available() and is_inflect_available() and is_unidecode_available():
|
||||||
from .pipelines import GradTTS
|
from .pipelines import GradTTSPipeline
|
||||||
else:
|
else:
|
||||||
from .utils.dummy_transformers_and_inflect_and_unidecode_objects import *
|
from .utils.dummy_transformers_and_inflect_and_unidecode_objects import *
|
||||||
|
|
|
@ -21,7 +21,6 @@ from typing import Optional, Union
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
|
|
||||||
from .configuration_utils import ConfigMixin
|
from .configuration_utils import ConfigMixin
|
||||||
from .dynamic_modules_utils import get_class_from_dynamic_module
|
|
||||||
from .utils import DIFFUSERS_CACHE, logging
|
from .utils import DIFFUSERS_CACHE, logging
|
||||||
|
|
||||||
|
|
||||||
|
@ -81,9 +80,6 @@ class DiffusionPipeline(ConfigMixin):
|
||||||
# set models
|
# set models
|
||||||
setattr(self, name, module)
|
setattr(self, name, module)
|
||||||
|
|
||||||
register_dict = {"_module": self.__module__.split(".")[-1]}
|
|
||||||
self.register_to_config(**register_dict)
|
|
||||||
|
|
||||||
def save_pretrained(self, save_directory: Union[str, os.PathLike]):
|
def save_pretrained(self, save_directory: Union[str, os.PathLike]):
|
||||||
self.save_config(save_directory)
|
self.save_config(save_directory)
|
||||||
|
|
||||||
|
@ -139,11 +135,7 @@ class DiffusionPipeline(ConfigMixin):
|
||||||
|
|
||||||
config_dict = cls.get_config_dict(cached_folder)
|
config_dict = cls.get_config_dict(cached_folder)
|
||||||
|
|
||||||
# 2. Get class name and module candidates to load custom models
|
# 2. Load the pipeline class, if using custom module then load it from the hub
|
||||||
module_candidate_name = config_dict["_module"]
|
|
||||||
module_candidate = module_candidate_name + ".py"
|
|
||||||
|
|
||||||
# 3. Load the pipeline class, if using custom module then load it from the hub
|
|
||||||
# if we load from explicit class, let's use it
|
# if we load from explicit class, let's use it
|
||||||
if cls != DiffusionPipeline:
|
if cls != DiffusionPipeline:
|
||||||
pipeline_class = cls
|
pipeline_class = cls
|
||||||
|
@ -151,11 +143,6 @@ class DiffusionPipeline(ConfigMixin):
|
||||||
diffusers_module = importlib.import_module(cls.__module__.split(".")[0])
|
diffusers_module = importlib.import_module(cls.__module__.split(".")[0])
|
||||||
pipeline_class = getattr(diffusers_module, config_dict["_class_name"])
|
pipeline_class = getattr(diffusers_module, config_dict["_class_name"])
|
||||||
|
|
||||||
# (TODO - we should allow to load custom pipelines
|
|
||||||
# else we need to load the correct module from the Hub
|
|
||||||
# module = module_candidate
|
|
||||||
# pipeline_class = get_class_from_dynamic_module(cached_folder, module, class_name_, cached_folder)
|
|
||||||
|
|
||||||
init_dict, _ = pipeline_class.extract_init_dict(config_dict, **kwargs)
|
init_dict, _ = pipeline_class.extract_init_dict(config_dict, **kwargs)
|
||||||
|
|
||||||
init_kwargs = {}
|
init_kwargs = {}
|
||||||
|
@ -163,7 +150,7 @@ class DiffusionPipeline(ConfigMixin):
|
||||||
# import it here to avoid circular import
|
# import it here to avoid circular import
|
||||||
from diffusers import pipelines
|
from diffusers import pipelines
|
||||||
|
|
||||||
# 4. Load each module in the pipeline
|
# 3. Load each module in the pipeline
|
||||||
for name, (library_name, class_name) in init_dict.items():
|
for name, (library_name, class_name) in init_dict.items():
|
||||||
is_pipeline_module = hasattr(pipelines, library_name)
|
is_pipeline_module = hasattr(pipelines, library_name)
|
||||||
# if the model is in a pipeline module, then we load it from the pipeline
|
# if the model is in a pipeline module, then we load it from the pipeline
|
||||||
|
@ -171,14 +158,7 @@ class DiffusionPipeline(ConfigMixin):
|
||||||
pipeline_module = getattr(pipelines, library_name)
|
pipeline_module = getattr(pipelines, library_name)
|
||||||
class_obj = getattr(pipeline_module, class_name)
|
class_obj = getattr(pipeline_module, class_name)
|
||||||
importable_classes = ALL_IMPORTABLE_CLASSES
|
importable_classes = ALL_IMPORTABLE_CLASSES
|
||||||
class_candidates = {c: class_obj for c in ALL_IMPORTABLE_CLASSES.keys()}
|
class_candidates = {c: class_obj for c in importable_classes.keys()}
|
||||||
elif library_name == module_candidate_name:
|
|
||||||
# if the model is not in diffusers or transformers, we need to load it from the hub
|
|
||||||
# assumes that it's a subclass of ModelMixin
|
|
||||||
class_obj = get_class_from_dynamic_module(cached_folder, module_candidate, class_name, cached_folder)
|
|
||||||
# since it's not from a library, we need to check class candidates for all importable classes
|
|
||||||
importable_classes = ALL_IMPORTABLE_CLASSES
|
|
||||||
class_candidates = {c: class_obj for c in ALL_IMPORTABLE_CLASSES.keys()}
|
|
||||||
else:
|
else:
|
||||||
# else we just import it from the library.
|
# else we just import it from the library.
|
||||||
library = importlib.import_module(library_name)
|
library = importlib.import_module(library_name)
|
||||||
|
|
|
@ -15,5 +15,5 @@ TODO(Patrick, Anton, Suraj)
|
||||||
- PNDM for unconditional image generation in [pipeline_pndm](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py).
|
- PNDM for unconditional image generation in [pipeline_pndm](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_pndm.py).
|
||||||
- Latent diffusion for text to image generation / conditional image generation in [pipeline_latent_diffusion](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_latent_diffusion.py).
|
- Latent diffusion for text to image generation / conditional image generation in [pipeline_latent_diffusion](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_latent_diffusion.py).
|
||||||
- Glide for text to image generation / conditional image generation in [pipeline_glide](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_glide.py).
|
- Glide for text to image generation / conditional image generation in [pipeline_glide](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_glide.py).
|
||||||
- BDDM for spectrogram-to-sound vocoding in [pipeline_bddm](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_bddm.py).
|
- BDDMPipeline for spectrogram-to-sound vocoding in [pipeline_bddm](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_bddm.py).
|
||||||
- Grad-TTS for text to audio generation / conditional audio generation in [pipeline_grad_tts](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_grad_tts.py).
|
- Grad-TTS for text to audio generation / conditional audio generation in [pipeline_grad_tts](https://github.com/huggingface/diffusers/blob/main/src/diffusers/pipelines/pipeline_grad_tts.py).
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
from ..utils import is_inflect_available, is_transformers_available, is_unidecode_available
|
from ..utils import is_inflect_available, is_transformers_available, is_unidecode_available
|
||||||
from .pipeline_bddm import BDDM
|
from .pipeline_bddm import BDDMPipeline
|
||||||
from .pipeline_ddim import DDIM
|
from .pipeline_ddim import DDIMPipeline
|
||||||
from .pipeline_ddpm import DDPM
|
from .pipeline_ddpm import DDPMPipeline
|
||||||
from .pipeline_pndm import PNDM
|
from .pipeline_pndm import PNDMPipeline
|
||||||
|
|
||||||
|
|
||||||
if is_transformers_available():
|
if is_transformers_available():
|
||||||
from .pipeline_glide import Glide
|
from .pipeline_glide import GlidePipeline
|
||||||
from .pipeline_latent_diffusion import LatentDiffusion
|
from .pipeline_latent_diffusion import LatentDiffusionPipeline
|
||||||
|
|
||||||
|
|
||||||
if is_transformers_available() and is_unidecode_available() and is_inflect_available():
|
if is_transformers_available() and is_unidecode_available() and is_inflect_available():
|
||||||
from .pipeline_grad_tts import GradTTS
|
from .pipeline_grad_tts import GradTTSPipeline
|
||||||
|
|
|
@ -271,7 +271,7 @@ class DiffWave(ModelMixin, ConfigMixin):
|
||||||
return self.final_conv(x)
|
return self.final_conv(x)
|
||||||
|
|
||||||
|
|
||||||
class BDDM(DiffusionPipeline):
|
class BDDMPipeline(DiffusionPipeline):
|
||||||
def __init__(self, diffwave, noise_scheduler):
|
def __init__(self, diffwave, noise_scheduler):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
noise_scheduler = noise_scheduler.set_format("pt")
|
noise_scheduler = noise_scheduler.set_format("pt")
|
||||||
|
|
|
@ -21,7 +21,7 @@ import tqdm
|
||||||
from ..pipeline_utils import DiffusionPipeline
|
from ..pipeline_utils import DiffusionPipeline
|
||||||
|
|
||||||
|
|
||||||
class DDIM(DiffusionPipeline):
|
class DDIMPipeline(DiffusionPipeline):
|
||||||
def __init__(self, unet, noise_scheduler):
|
def __init__(self, unet, noise_scheduler):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
noise_scheduler = noise_scheduler.set_format("pt")
|
noise_scheduler = noise_scheduler.set_format("pt")
|
||||||
|
|
|
@ -21,7 +21,7 @@ import tqdm
|
||||||
from ..pipeline_utils import DiffusionPipeline
|
from ..pipeline_utils import DiffusionPipeline
|
||||||
|
|
||||||
|
|
||||||
class DDPM(DiffusionPipeline):
|
class DDPMPipeline(DiffusionPipeline):
|
||||||
def __init__(self, unet, noise_scheduler):
|
def __init__(self, unet, noise_scheduler):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
noise_scheduler = noise_scheduler.set_format("pt")
|
noise_scheduler = noise_scheduler.set_format("pt")
|
||||||
|
|
|
@ -711,7 +711,7 @@ def _extract_into_tensor(arr, timesteps, broadcast_shape):
|
||||||
return res + torch.zeros(broadcast_shape, device=timesteps.device)
|
return res + torch.zeros(broadcast_shape, device=timesteps.device)
|
||||||
|
|
||||||
|
|
||||||
class Glide(DiffusionPipeline):
|
class GlidePipeline(DiffusionPipeline):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
text_unet: GlideTextToImageUNetModel,
|
text_unet: GlideTextToImageUNetModel,
|
||||||
|
|
|
@ -420,7 +420,7 @@ class TextEncoder(ModelMixin, ConfigMixin):
|
||||||
return mu, logw, x_mask
|
return mu, logw, x_mask
|
||||||
|
|
||||||
|
|
||||||
class GradTTS(DiffusionPipeline):
|
class GradTTSPipeline(DiffusionPipeline):
|
||||||
def __init__(self, unet, text_encoder, noise_scheduler, tokenizer):
|
def __init__(self, unet, text_encoder, noise_scheduler, tokenizer):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
noise_scheduler = noise_scheduler.set_format("pt")
|
noise_scheduler = noise_scheduler.set_format("pt")
|
||||||
|
@ -430,7 +430,14 @@ class GradTTS(DiffusionPipeline):
|
||||||
|
|
||||||
@torch.no_grad()
|
@torch.no_grad()
|
||||||
def __call__(
|
def __call__(
|
||||||
self, text, num_inference_steps=50, temperature=1.3, length_scale=0.91, speaker_id=15, torch_device=None
|
self,
|
||||||
|
text,
|
||||||
|
num_inference_steps=50,
|
||||||
|
temperature=1.3,
|
||||||
|
length_scale=0.91,
|
||||||
|
speaker_id=15,
|
||||||
|
torch_device=None,
|
||||||
|
generator=None,
|
||||||
):
|
):
|
||||||
if torch_device is None:
|
if torch_device is None:
|
||||||
torch_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
torch_device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
|
@ -464,7 +471,7 @@ class GradTTS(DiffusionPipeline):
|
||||||
mu_y = mu_y.transpose(1, 2)
|
mu_y = mu_y.transpose(1, 2)
|
||||||
|
|
||||||
# Sample latent representation from terminal distribution N(mu_y, I)
|
# Sample latent representation from terminal distribution N(mu_y, I)
|
||||||
z = mu_y + torch.randn_like(mu_y, device=mu_y.device) / temperature
|
z = mu_y + torch.randn(mu_y.shape, device=mu_y.device, generator=generator) / temperature
|
||||||
|
|
||||||
xt = z * y_mask
|
xt = z * y_mask
|
||||||
h = 1.0 / num_inference_steps
|
h = 1.0 / num_inference_steps
|
||||||
|
|
|
@ -860,7 +860,7 @@ class AutoencoderKL(ModelMixin, ConfigMixin):
|
||||||
return dec, posterior
|
return dec, posterior
|
||||||
|
|
||||||
|
|
||||||
class LatentDiffusion(DiffusionPipeline):
|
class LatentDiffusionPipeline(DiffusionPipeline):
|
||||||
def __init__(self, vqvae, bert, tokenizer, unet, noise_scheduler):
|
def __init__(self, vqvae, bert, tokenizer, unet, noise_scheduler):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
noise_scheduler = noise_scheduler.set_format("pt")
|
noise_scheduler = noise_scheduler.set_format("pt")
|
||||||
|
|
|
@ -21,7 +21,7 @@ import tqdm
|
||||||
from ..pipeline_utils import DiffusionPipeline
|
from ..pipeline_utils import DiffusionPipeline
|
||||||
|
|
||||||
|
|
||||||
class PNDM(DiffusionPipeline):
|
class PNDMPipeline(DiffusionPipeline):
|
||||||
def __init__(self, unet, noise_scheduler):
|
def __init__(self, unet, noise_scheduler):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
noise_scheduler = noise_scheduler.set_format("pt")
|
noise_scheduler = noise_scheduler.set_format("pt")
|
||||||
|
|
|
@ -22,17 +22,17 @@ import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from diffusers import (
|
from diffusers import (
|
||||||
BDDM,
|
BDDMPipeline,
|
||||||
DDIM,
|
DDIMPipeline,
|
||||||
DDPM,
|
|
||||||
Glide,
|
|
||||||
PNDM,
|
|
||||||
DDIMScheduler,
|
DDIMScheduler,
|
||||||
|
DDPMPipeline,
|
||||||
DDPMScheduler,
|
DDPMScheduler,
|
||||||
|
GlidePipeline,
|
||||||
GlideSuperResUNetModel,
|
GlideSuperResUNetModel,
|
||||||
GlideTextToImageUNetModel,
|
GlideTextToImageUNetModel,
|
||||||
GradTTS,
|
GradTTSPipeline,
|
||||||
LatentDiffusion,
|
LatentDiffusionPipeline,
|
||||||
|
PNDMPipeline,
|
||||||
PNDMScheduler,
|
PNDMScheduler,
|
||||||
UNetGradTTSModel,
|
UNetGradTTSModel,
|
||||||
UNetLDMModel,
|
UNetLDMModel,
|
||||||
|
@ -583,11 +583,11 @@ class PipelineTesterMixin(unittest.TestCase):
|
||||||
model = UNetModel(ch=32, ch_mult=(1, 2), num_res_blocks=2, attn_resolutions=(16,), resolution=32)
|
model = UNetModel(ch=32, ch_mult=(1, 2), num_res_blocks=2, attn_resolutions=(16,), resolution=32)
|
||||||
schedular = DDPMScheduler(timesteps=10)
|
schedular = DDPMScheduler(timesteps=10)
|
||||||
|
|
||||||
ddpm = DDPM(model, schedular)
|
ddpm = DDPMPipeline(model, schedular)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
ddpm.save_pretrained(tmpdirname)
|
ddpm.save_pretrained(tmpdirname)
|
||||||
new_ddpm = DDPM.from_pretrained(tmpdirname)
|
new_ddpm = DDPMPipeline.from_pretrained(tmpdirname)
|
||||||
|
|
||||||
generator = torch.manual_seed(0)
|
generator = torch.manual_seed(0)
|
||||||
|
|
||||||
|
@ -601,7 +601,7 @@ class PipelineTesterMixin(unittest.TestCase):
|
||||||
def test_from_pretrained_hub(self):
|
def test_from_pretrained_hub(self):
|
||||||
model_path = "fusing/ddpm-cifar10"
|
model_path = "fusing/ddpm-cifar10"
|
||||||
|
|
||||||
ddpm = DDPM.from_pretrained(model_path)
|
ddpm = DDPMPipeline.from_pretrained(model_path)
|
||||||
ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path)
|
ddpm_from_hub = DiffusionPipeline.from_pretrained(model_path)
|
||||||
|
|
||||||
ddpm.noise_scheduler.num_timesteps = 10
|
ddpm.noise_scheduler.num_timesteps = 10
|
||||||
|
@ -624,7 +624,7 @@ class PipelineTesterMixin(unittest.TestCase):
|
||||||
noise_scheduler = DDPMScheduler.from_config(model_id)
|
noise_scheduler = DDPMScheduler.from_config(model_id)
|
||||||
noise_scheduler = noise_scheduler.set_format("pt")
|
noise_scheduler = noise_scheduler.set_format("pt")
|
||||||
|
|
||||||
ddpm = DDPM(unet=unet, noise_scheduler=noise_scheduler)
|
ddpm = DDPMPipeline(unet=unet, noise_scheduler=noise_scheduler)
|
||||||
image = ddpm(generator=generator)
|
image = ddpm(generator=generator)
|
||||||
|
|
||||||
image_slice = image[0, -1, -3:, -3:].cpu()
|
image_slice = image[0, -1, -3:, -3:].cpu()
|
||||||
|
@ -641,7 +641,7 @@ class PipelineTesterMixin(unittest.TestCase):
|
||||||
unet = UNetModel.from_pretrained(model_id)
|
unet = UNetModel.from_pretrained(model_id)
|
||||||
noise_scheduler = DDIMScheduler(tensor_format="pt")
|
noise_scheduler = DDIMScheduler(tensor_format="pt")
|
||||||
|
|
||||||
ddim = DDIM(unet=unet, noise_scheduler=noise_scheduler)
|
ddim = DDIMPipeline(unet=unet, noise_scheduler=noise_scheduler)
|
||||||
image = ddim(generator=generator, eta=0.0)
|
image = ddim(generator=generator, eta=0.0)
|
||||||
|
|
||||||
image_slice = image[0, -1, -3:, -3:].cpu()
|
image_slice = image[0, -1, -3:, -3:].cpu()
|
||||||
|
@ -660,7 +660,7 @@ class PipelineTesterMixin(unittest.TestCase):
|
||||||
unet = UNetModel.from_pretrained(model_id)
|
unet = UNetModel.from_pretrained(model_id)
|
||||||
noise_scheduler = PNDMScheduler(tensor_format="pt")
|
noise_scheduler = PNDMScheduler(tensor_format="pt")
|
||||||
|
|
||||||
pndm = PNDM(unet=unet, noise_scheduler=noise_scheduler)
|
pndm = PNDMPipeline(unet=unet, noise_scheduler=noise_scheduler)
|
||||||
image = pndm(generator=generator)
|
image = pndm(generator=generator)
|
||||||
|
|
||||||
image_slice = image[0, -1, -3:, -3:].cpu()
|
image_slice = image[0, -1, -3:, -3:].cpu()
|
||||||
|
@ -674,7 +674,7 @@ class PipelineTesterMixin(unittest.TestCase):
|
||||||
@slow
|
@slow
|
||||||
def test_ldm_text2img(self):
|
def test_ldm_text2img(self):
|
||||||
model_id = "fusing/latent-diffusion-text2im-large"
|
model_id = "fusing/latent-diffusion-text2im-large"
|
||||||
ldm = LatentDiffusion.from_pretrained(model_id)
|
ldm = LatentDiffusionPipeline.from_pretrained(model_id)
|
||||||
|
|
||||||
prompt = "A painting of a squirrel eating a burger"
|
prompt = "A painting of a squirrel eating a burger"
|
||||||
generator = torch.manual_seed(0)
|
generator = torch.manual_seed(0)
|
||||||
|
@ -689,7 +689,7 @@ class PipelineTesterMixin(unittest.TestCase):
|
||||||
@slow
|
@slow
|
||||||
def test_glide_text2img(self):
|
def test_glide_text2img(self):
|
||||||
model_id = "fusing/glide-base"
|
model_id = "fusing/glide-base"
|
||||||
glide = Glide.from_pretrained(model_id)
|
glide = GlidePipeline.from_pretrained(model_id)
|
||||||
|
|
||||||
prompt = "a pencil sketch of a corgi"
|
prompt = "a pencil sketch of a corgi"
|
||||||
generator = torch.manual_seed(0)
|
generator = torch.manual_seed(0)
|
||||||
|
@ -704,22 +704,25 @@ class PipelineTesterMixin(unittest.TestCase):
|
||||||
@slow
|
@slow
|
||||||
def test_grad_tts(self):
|
def test_grad_tts(self):
|
||||||
model_id = "fusing/grad-tts-libri-tts"
|
model_id = "fusing/grad-tts-libri-tts"
|
||||||
grad_tts = GradTTS.from_pretrained(model_id)
|
grad_tts = GradTTSPipeline.from_pretrained(model_id)
|
||||||
|
|
||||||
text = "Hello world, I missed you so much."
|
text = "Hello world, I missed you so much."
|
||||||
|
generator = torch.manual_seed(0)
|
||||||
|
|
||||||
# generate mel spectograms using text
|
# generate mel spectograms using text
|
||||||
mel_spec = grad_tts(text)
|
mel_spec = grad_tts(text, generator=generator)
|
||||||
|
|
||||||
assert mel_spec.shape == (1, 256, 256, 3)
|
assert mel_spec.shape == (1, 80, 143)
|
||||||
expected_slice = torch.tensor([0.7119, 0.7073, 0.6460, 0.7780, 0.7423, 0.6926, 0.7378, 0.7189, 0.7784])
|
expected_slice = torch.tensor(
|
||||||
assert (mel_spec.flatten() - expected_slice).abs().max() < 1e-2
|
[-6.6119, -6.5963, -6.2776, -6.7496, -6.7096, -6.5131, -6.4643, -6.4817, -6.7185]
|
||||||
|
)
|
||||||
|
assert (mel_spec[0, :3, :3].flatten() - expected_slice).abs().max() < 1e-2
|
||||||
|
|
||||||
def test_module_from_pipeline(self):
|
def test_module_from_pipeline(self):
|
||||||
model = DiffWave(num_res_layers=4)
|
model = DiffWave(num_res_layers=4)
|
||||||
noise_scheduler = DDPMScheduler(timesteps=12)
|
noise_scheduler = DDPMScheduler(timesteps=12)
|
||||||
|
|
||||||
bddm = BDDM(model, noise_scheduler)
|
bddm = BDDMPipeline(model, noise_scheduler)
|
||||||
|
|
||||||
# check if the library name for the diffwave moduel is set to pipeline module
|
# check if the library name for the diffwave moduel is set to pipeline module
|
||||||
self.assertTrue(bddm.config["diffwave"][0] == "pipeline_bddm")
|
self.assertTrue(bddm.config["diffwave"][0] == "pipeline_bddm")
|
||||||
|
@ -727,6 +730,6 @@ class PipelineTesterMixin(unittest.TestCase):
|
||||||
# check if we can save and load the pipeline
|
# check if we can save and load the pipeline
|
||||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
bddm.save_pretrained(tmpdirname)
|
bddm.save_pretrained(tmpdirname)
|
||||||
_ = BDDM.from_pretrained(tmpdirname)
|
_ = BDDMPipeline.from_pretrained(tmpdirname)
|
||||||
# check if the same works using the DifusionPipeline class
|
# check if the same works using the DifusionPipeline class
|
||||||
_ = DiffusionPipeline.from_pretrained(tmpdirname)
|
_ = DiffusionPipeline.from_pretrained(tmpdirname)
|
||||||
|
|
Loading…
Reference in New Issue