import json import logging import os.path from dataclasses import dataclass import random from typing import Generator, Callable, Any import torch from PIL import Image, ImageDraw, ImageFont from colorama import Fore, Style from diffusers import StableDiffusionPipeline, DDIMScheduler, DPMSolverMultistepScheduler, DDPMScheduler, PNDMScheduler, EulerDiscreteScheduler, EulerAncestralDiscreteScheduler, LMSDiscreteScheduler, KDPM2AncestralDiscreteScheduler from torch import FloatTensor from torch.cuda.amp import autocast from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from tqdm.auto import tqdm from compel import Compel def clean_filename(filename): """ removes all non-alphanumeric characters from a string so it is safe to use as a filename """ return "".join([c for c in filename if c.isalpha() or c.isdigit() or c==' ']).rstrip() @dataclass class SampleRequest: prompt: str negative_prompt: str seed: int size: tuple[int,int] wants_random_caption: bool = False def __str__(self): rep = self.prompt if len(self.negative_prompt) > 0: rep += f"\n negative prompt: {self.negative_prompt}" rep += f"\n seed: {self.seed}" return rep def chunk_list(l: list, batch_size: int, compatibility_test: Callable[[Any,Any], bool]=lambda x,y: True ) -> Generator[list, None, None]: buckets = [] for item in l: compatible_bucket = next((b for b in buckets if compatibility_test(item, b[0])), None) if compatible_bucket is not None: compatible_bucket.append(item) else: buckets.append([item]) for b in buckets: for i in range(0, len(b), batch_size): yield b[i:i + batch_size] def get_best_size_for_aspect_ratio(aspect_ratio, default_resolution) -> tuple[int, int]: sizes = [] target_pixel_count = default_resolution * default_resolution for w in range(256, 1024, 64): for h in range(256, 1024, 64): if abs((w * h) - target_pixel_count) <= 128 * 64: sizes.append((w, h)) best_size = min(sizes, key=lambda s: abs(1 - (aspect_ratio / (s[0] / s[1])))) return best_size class SampleGenerator: seed: int default_resolution: int cfgs: list[float] = [7, 4, 1.01] scheduler: str = 'ddim' num_inference_steps: int = 30 random_captions = False sample_requests: [str] log_folder: str log_writer: SummaryWriter def __init__(self, log_folder: str, log_writer: SummaryWriter, default_resolution: int, config_file_path: str, batch_size: int, default_seed: int, default_sample_steps: int, use_xformers: bool, use_penultimate_clip_layer: bool): self.log_folder = log_folder self.log_writer = log_writer self.batch_size = batch_size self.config_file_path = config_file_path self.use_xformers = use_xformers self.show_progress_bars = False self.generate_pretrain_samples = False self.use_penultimate_clip_layer = use_penultimate_clip_layer self.default_resolution = default_resolution self.default_seed = default_seed self.sample_steps = default_sample_steps self.sample_requests = None self.reload_config() print(f" * SampleGenerator initialized with {len(self.sample_requests)} prompts, generating samples every {self.sample_steps} training steps, using scheduler '{self.scheduler}' with {self.num_inference_steps} inference steps") if not os.path.exists(f"{log_folder}/samples/"): os.makedirs(f"{log_folder}/samples/") def reload_config(self): try: config_file_extension = os.path.splitext(self.config_file_path)[1].lower() if config_file_extension == '.txt': self._reload_sample_prompts_txt(self.config_file_path) elif config_file_extension == '.json': self._reload_config_json(self.config_file_path) else: raise ValueError(f"Unrecognized file type '{config_file_extension}' for sample config, must be .txt or .json") except Exception as e: logging.warning( f" * {Fore.LIGHTYELLOW_EX}Error trying to read sample config from {self.config_file_path}: {Style.RESET_ALL}{e}") logging.warning( f" Edit {self.config_file_path} to fix the problem. It will be automatically reloaded next time samples are due to be generated." ) if self.sample_requests == None: logging.warning( f" Will generate samples from random training image captions until the problem is fixed.") self.sample_requests = self._make_random_caption_sample_requests() def update_random_captions(self, possible_captions: list[str]): random_prompt_sample_requests = [r for r in self.sample_requests if r.wants_random_caption] for i, r in enumerate(random_prompt_sample_requests): r.prompt = possible_captions[i % len(possible_captions)] def _reload_sample_prompts_txt(self, path): with open(path, 'rt') as f: self.sample_requests = [SampleRequest(prompt=line.strip(), negative_prompt='', seed=self.default_seed, size=(self.default_resolution, self.default_resolution) ) for line in f] if len(self.sample_requests) == 0: self.sample_requests = self._make_random_caption_sample_requests() def _make_random_caption_sample_requests(self): num_random_captions = min(4, self.batch_size) return [SampleRequest(prompt='', negative_prompt='', seed=self.default_seed, size=(self.default_resolution, self.default_resolution), wants_random_caption=True) for _ in range(num_random_captions)] def _reload_config_json(self, path): with open(path, 'rt') as f: config = json.load(f) # if keys are missing, keep current values self.default_resolution = config.get('resolution', self.default_resolution) self.cfgs = config.get('cfgs', self.cfgs) self.batch_size = config.get('batch_size', self.batch_size) self.scheduler = config.get('scheduler', self.scheduler) self.num_inference_steps = config.get('num_inference_steps', self.num_inference_steps) self.show_progress_bars = config.get('show_progress_bars', self.show_progress_bars) self.generate_pretrain_samples = config.get('generate_pretrain_samples', self.generate_pretrain_samples) self.sample_steps = config.get('generate_samples_every_n_steps', self.sample_steps) sample_requests_config = config.get('samples', None) if sample_requests_config is None: self.sample_requests = self._make_random_caption_sample_requests() else: default_seed = config.get('seed', self.default_seed) self.sample_requests = [SampleRequest(prompt=p.get('prompt', ''), negative_prompt=p.get('negative_prompt', ''), seed=p.get('seed', default_seed), size=tuple(p.get('size', None) or get_best_size_for_aspect_ratio(p.get('aspect_ratio', 1), self.default_resolution)), wants_random_caption=p.get('random_caption', False) ) for p in sample_requests_config] if len(self.sample_requests) == 0: self._make_random_caption_sample_requests() @torch.no_grad() def generate_samples(self, pipe: StableDiffusionPipeline, global_step: int): """ generates samples at different cfg scales and saves them to disk """ disable_progress_bars = not self.show_progress_bars try: font = ImageFont.truetype(font="arial.ttf", size=20) except: font = ImageFont.load_default() if not self.show_progress_bars: print(f" * Generating samples at gs:{global_step} for {len(self.sample_requests)} prompts") sample_index = 0 with autocast(): batch: list[SampleRequest] def sample_compatibility_test(a: SampleRequest, b: SampleRequest) -> bool: return a.size == b.size batches = list(chunk_list(self.sample_requests, self.batch_size, compatibility_test=sample_compatibility_test)) pbar = tqdm(total=len(batches), disable=disable_progress_bars, position=1, leave=False, desc=f"{Fore.YELLOW}Image samples (batches of {self.batch_size}){Style.RESET_ALL}") compel = Compel(tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, use_penultimate_clip_layer=self.use_penultimate_clip_layer) for batch in batches: prompts = [p.prompt for p in batch] negative_prompts = [p.negative_prompt for p in batch] seeds = [(p.seed if p.seed != -1 else random.randint(0, 2 ** 30)) for p in batch] # all sizes in a batch are the same size = batch[0].size generators = [torch.Generator(pipe.device).manual_seed(seed) for seed in seeds] batch_images = [] for cfg in self.cfgs: pipe.set_progress_bar_config(disable=disable_progress_bars, position=2, leave=False, desc=f"{Fore.LIGHTYELLOW_EX}CFG scale {cfg}{Style.RESET_ALL}") prompt_embeds = compel(prompts) negative_prompt_embeds = compel(negative_prompts) images = pipe(prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds, num_inference_steps=self.num_inference_steps, num_images_per_prompt=1, guidance_scale=cfg, generator=generators, width=size[0], height=size[1], ).images for image in images: draw = ImageDraw.Draw(image) print_msg = f"cfg:{cfg:.1f}" l, t, r, b = draw.textbbox(xy=(0, 0), text=print_msg, font=font) text_width = r - l text_height = b - t x = float(image.width - text_width - 10) y = float(image.height - text_height - 10) draw.rectangle((x, y, image.width, image.height), fill="white") draw.text((x, y), print_msg, fill="black", font=font) batch_images.append(images) del images del generators #print("batch_images:", batch_images) width = size[0] * len(self.cfgs) height = size[1] for prompt_idx in range(len(batch)): #print(f"batch_images[:][{prompt_idx}]: {batch_images[:][prompt_idx]}") result = Image.new('RGB', (width, height)) x_offset = 0 for cfg_idx in range(len(self.cfgs)): image = batch_images[cfg_idx][prompt_idx] result.paste(image, (x_offset, 0)) x_offset += image.width prompt = prompts[prompt_idx] clean_prompt = clean_filename(prompt) result.save(f"{self.log_folder}/samples/gs{global_step:05}-{sample_index}-{clean_prompt[:100]}.jpg", format="JPEG", quality=95, optimize=True, progressive=False) with open(f"{self.log_folder}/samples/gs{global_step:05}-{sample_index}-{clean_prompt[:100]}.txt", "w", encoding='utf-8') as f: f.write(str(batch[prompt_idx])) tfimage = transforms.ToTensor()(result) if batch[prompt_idx].wants_random_caption: self.log_writer.add_image(tag=f"sample_{sample_index}", img_tensor=tfimage, global_step=global_step) else: self.log_writer.add_image(tag=f"sample_{sample_index}_{clean_prompt[:100]}", img_tensor=tfimage, global_step=global_step) sample_index += 1 del result del tfimage del batch_images pbar.update(1) @torch.no_grad() def create_inference_pipe(self, unet, text_encoder, tokenizer, vae, diffusers_scheduler_config: dict): """ creates a pipeline for SD inference """ scheduler = self._create_scheduler(diffusers_scheduler_config) pipe = StableDiffusionPipeline( vae=vae, text_encoder=text_encoder, tokenizer=tokenizer, unet=unet, scheduler=scheduler, safety_checker=None, # save vram requires_safety_checker=None, # avoid nag feature_extractor=None, # must be None if no safety checker ) if self.use_xformers: pipe.enable_xformers_memory_efficient_attention() return pipe @torch.no_grad() def _create_scheduler(self, scheduler_config: dict): scheduler = self.scheduler if scheduler not in ['ddim', 'dpm++', 'pndm', 'ddpm', 'lms', 'euler', 'euler_a', 'kdpm2']: print(f"unsupported scheduler '{self.scheduler}', falling back to ddim") scheduler = 'ddim' if scheduler == 'ddim': return DDIMScheduler.from_config(scheduler_config) elif scheduler == 'dpm++': return DPMSolverMultistepScheduler.from_config(scheduler_config, algorithm_type="dpmsolver++") elif scheduler == 'pndm': return PNDMScheduler.from_config(scheduler_config) elif scheduler == 'ddpm': return DDPMScheduler.from_config(scheduler_config) elif scheduler == 'lms': return LMSDiscreteScheduler.from_config(scheduler_config) elif scheduler == 'euler': return EulerDiscreteScheduler.from_config(scheduler_config) elif scheduler == 'euler_a': return EulerAncestralDiscreteScheduler.from_config(scheduler_config) elif scheduler == 'kdpm2': return KDPM2AncestralDiscreteScheduler.from_config(scheduler_config) else: raise ValueError(f"unknown scheduler '{scheduler}'")