unload old images for better sys ram use, fix up crop jitter

This commit is contained in:
Victor Hall 2022-11-16 13:52:06 -05:00
parent d26fabac14
commit b47c5c0ed3
17 changed files with 485 additions and 109 deletions

View File

@ -65,7 +65,7 @@ model:
data: data:
target: main.DataModuleFromConfig target: main.DataModuleFromConfig
params: params:
batch_size: 6 # prefer highest possible without getting CUDA Out of Memory error batch_size: 6 # prefer highest possible without getting CUDA Out of Memory error, A100 40GB =~20 80GB= ~48
num_workers: 6 num_workers: 6
wrap: falsegit wrap: falsegit
train: train:
@ -73,9 +73,10 @@ data:
params: params:
repeats: 5 # rough suggestions: 5 with 5000+ images, 15 for 1000 images, use micro yaml for <100 repeats: 5 # rough suggestions: 5 with 5000+ images, 15 for 1000 images, use micro yaml for <100
debug_level: 1 # 1 to print if images are dropped due to multiple-aspect ratio image batching debug_level: 1 # 1 to print if images are dropped due to multiple-aspect ratio image batching
conditional_dropout: 0.01 # experimental, likelihood to drop the caption, may help with poorly captioned images conditional_dropout: 0.08 # experimental, likelihood to drop the caption, may help with poorly captioned images
crop_jitter: 5 # adds N pixels of jitter to cropping algorithm for non-square images only crop_jitter: 5 # adds N pixels of jitter to cropping algorithm for non-square images only
big_mode: 0 # set to 1 or 2 to use larger image sizes for training, USES LOTS OF VRAM! Requires 40GB+ resolution: 512 # 512, 576, or 640, increases VRAM substantially
seed: 555 # seed used to shuffle the dataset ordering, keep constant for reproducibility
validation: validation:
target: ldm.data.ed_validate.EDValidateBatch target: ldm.data.ed_validate.EDValidateBatch
params: params:

View File

@ -0,0 +1,109 @@
model:
base_learning_rate: 1.2e-6
target: ldm.models.diffusion.ddpm.LatentDiffusion
params:
linear_start: 0.00085
linear_end: 0.0120
num_timesteps_cond: 1
log_every_t: 300
timesteps: 1000
first_stage_key: image
cond_stage_key: caption
image_size: 64
channels: 4
cond_stage_trainable: true
conditioning_key: crossattn
monitor: val/loss_simple_ema
scale_factor: 0.18215
use_ema: False
unfreeze_model: True
model_lr: 1.2e-6
unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
params:
image_size: 32 # unused
in_channels: 4
out_channels: 4
model_channels: 320
attention_resolutions: [ 4, 2, 1 ]
num_res_blocks: 2
channel_mult: [ 1, 2, 4, 4 ]
num_heads: 8
use_spatial_transformer: True
transformer_depth: 1
context_dim: 768
use_checkpoint: True
legacy: False
first_stage_config:
target: ldm.models.autoencoder.AutoencoderKL
params:
embed_dim: 4
monitor: val/rec_loss
ddconfig:
double_z: true
z_channels: 4
resolution: 512
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
lossconfig:
target: torch.nn.Identity
cond_stage_config:
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
data:
target: main.DataModuleFromConfig
params:
batch_size: 6 # prefer highest possible without getting CUDA Out of Memory error, A100 40GB =~20 80GB= ~48
num_workers: 6
wrap: falsegit
train:
target: ldm.data.every_dream.EveryDreamBatch
params:
repeats: 1 # rough suggestions: 5 with 5000+ images, 15 for 1000 images, use micro yaml for <100
debug_level: 1 # 1 to print if images are dropped due to multiple-aspect ratio image batching
conditional_dropout: 0.08 # experimental, likelihood to drop the caption, may help with poorly captioned images
crop_jitter: 15 # adds N pixels of jitter to cropping algorithm for non-square images only
big_mode: 0 # set to 1 or 2 to use larger image sizes for training, USES LOTS OF VRAM! Requires 40GB+
validation:
target: ldm.data.ed_validate.EDValidateBatch
params:
repeats: 0.3
test:
target: ldm.data.ed_validate.EDValidateBatch
params:
repeats: 0.2
lightning:
modelcheckpoint:
params:
every_n_epochs: 1 # produce a ckpt every epoch, leave 1!
#every_n_train_steps: 1400 # can only use epoch or train step checkpoints
save_top_k: 6 # save the best N ckpts according to loss, can reduce to save disk space but suggest at LEAST 2, more if you have max_epochs below higher!
save_last: True
filename: "{epoch:02d}-{step:05d}"
callbacks:
image_logger:
target: main.ImageLogger
params:
batch_frequency: 500
max_images: 16
increase_log_steps: False
trainer:
benchmark: True
max_epochs: 1 # better to run several epochs and test your checkpoints! Try 4-5, you get a checkpoint every epoch to test!
max_steps: 99000 # better to end on epochs not steps, especially with >500 images to ensure even distribution, but you can set this if you really want...
check_val_every_n_epoch: 1
gpus: 0,

View File

@ -1,5 +1,5 @@
model: model:
base_learning_rate: 1.0e-6 base_learning_rate: 1.2e-6
target: ldm.models.diffusion.ddpm.LatentDiffusion target: ldm.models.diffusion.ddpm.LatentDiffusion
params: params:
linear_start: 0.00085 linear_start: 0.00085
@ -17,16 +17,17 @@ model:
scale_factor: 0.18215 scale_factor: 0.18215
use_ema: False use_ema: False
unfreeze_model: True unfreeze_model: True
#model_lr: 1.0e-6 model_lr: 1.1e-6
#use_scheduler: True
scheduler_config: scheduler_config:
target: ldm.lr_scheduler.LambdaLinearScheduler target: ldm.lr_scheduler.EveryDreamScheduler
params: params:
warm_up_steps: [ 5 ] f_start: 5.0e-1 # starting LR multiplier
cycle_lengths: [ 1000 ] # incredibly large number to prevent corner cases warm_up_steps: 50 # number of steps to warm up to f_start before decaying LR
verbosity_interval: 25 # how often to print LR updates f_max: 1.0 # maximum LR multiplier
f_start: [ 1.e-6 ] f_min: 5.0e-1 # minimum LR multiplier
f_max: [ 1.e-6 ] # 1. steps_to_min: 10000 # number of steps to decay from f_max to f_min
f_min: [ 1.e-8 ] # 1. verbosity_interval: 200 # how often to print LR multiplier (steps)
unet_config: unet_config:
target: ldm.modules.diffusionmodules.openaimodel.UNetModel target: ldm.modules.diffusionmodules.openaimodel.UNetModel
@ -80,13 +81,16 @@ data:
train: train:
target: ldm.data.every_dream.EveryDreamBatch target: ldm.data.every_dream.EveryDreamBatch
params: params:
repeats: 5 repeats: 1 # rough suggestions: 5 with 5000+ images, 15 for 1000 images, use micro yaml for <100
flip_p: 0 debug_level: 1 # 1 to print if images are dropped due to multiple-aspect ratio image batching
debug_level: 1 conditional_dropout: 0.08 # experimental, likelihood to drop the caption, may help with poorly captioned images
crop_jitter: 20 # adds N pixels of jitter to cropping algorithm for non-square images only
resolution: 512 # defines max pixels for all aspects, 512, 576, 640, 704, or 768
seed: 555 # seed used to shuffle the dataset, keep constant for reproducibility
validation: validation:
target: ldm.data.ed_validate.EDValidateBatch target: ldm.data.ed_validate.EDValidateBatch
params: params:
repeats: 0.5 repeats: 0.25
test: test:
target: ldm.data.ed_validate.EDValidateBatch target: ldm.data.ed_validate.EDValidateBatch
params: params:
@ -96,21 +100,21 @@ lightning:
modelcheckpoint: modelcheckpoint:
params: params:
every_n_epochs: 1 every_n_epochs: 1
#every_n_train_steps: 1400 # can only use every_n_epochs OR every_n_train_steps, suggest you stick with epochs #every_n_train_steps: 1500 # can only use every_n_epochs OR every_n_train_steps, suggest you stick with epochs
save_last: True save_last: True
save_top_k: 5 save_top_k: 99
filename: "{epoch:02d}-{step:05d}" filename: "{epoch:02d}-{step:05d}"
callbacks: callbacks:
image_logger: image_logger:
target: main.ImageLogger target: main.ImageLogger
params: params:
batch_frequency: 100 batch_frequency: 400
max_images: 16 max_images: 16
increase_log_steps: False increase_log_steps: False
trainer: trainer:
benchmark: True benchmark: True
max_epochs: 4 max_epochs: 5
max_steps: 99000 # better to end on epochs not steps, especially with >500 images to ensure even distribution, but you can set this if you really want... max_steps: 99000 # better to end on epochs not steps, especially with >500 images to ensure even distribution, but you can set this if you really want...
check_val_every_n_epoch: 1 check_val_every_n_epoch: 1
gpus: 0, gpus: 0,

BIN
demo/crop.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 196 KiB

76
ldm/data/aspects.py Normal file
View File

@ -0,0 +1,76 @@
GOD_ASPECTS = [[768,768], # 589824 1:1
[832,704],[704,832], # 585728 1.181:1
[896,640],[640,896], # 573440 1.4:1
[960,576],[576,960], # 552960 1.6:1
[1024,576],[576,1024], # 524288 1.778:1
[1088,512],[512,1088], # 497664 2.125:1
[1152,512],[512,1152], # 589824 2.25:1
[1216,448],[448,1216], # 552960 2.714:1
[1280,448],[448,1280], # 573440 2.857:1
[1344,384],[384,1344], # 518400 3.5:1
[1408,384],[384,1408], # 540672 3.667:1
[1472,320],[320,1472], # 470400 4.6:1
[1536,320],[320,1536], # 491520 4.8:1
]
MASSIVE_ASPECTS = [[704,704], # 501,376 1:1
[768,640],[640,768], # 491,520 1.2:1
[832,576],[576,832], # 458,752 1.444:1
[896,512],[512,896], # 458,752 1.75:1
[960,512],[512,960], # 491,520 1.875:1
[1024,448],[448,1024], # 458,752 2.286:1
[1088,448],[448,1088], # 487,424 2.429:1
[1152,384],[384,1152], # 442,368 3:1
[1216,384],[384,1216], # 466,944 3.125:1
[1280,384],[384,1280], # 491,520 3.333:1
[1280,320],[320,1280], # 409,600 4:1
[1408,320],[320,1408], # 450,560 4.4:1
[1536,320],[320,1536], # 491,520 4.8:1
]
HUGE_ASPECTS = [[640,640], # 409600 1:1
[704,576],[576,704], # 405504 1.25:1
[768,512],[512,768], # 393216 1.5:1
[896,448],[448,896], # 401408 2:1
[1024,384],[384,1024], # 393216 2.667:1
[1280,320],[320,1280], # 409600 4:1
[1408,256],[256,1408], # 360448 5.5:1
[1472,256],[256,1472], # 376832 5.75:1
[1536,256],[256,1536], # 393216 6:1
[1600,256],[256,1600], # 409600 6.25:1
]
BIG_ASPECTS = [[576,576], # 331776 1:1\
[640,512],[512,640], # 327680 1.25:1\
[640,448],[448,640], # 286720 1.4286:1\
[704,448],[448,704], # 314928 1.5625:1
[832,384],[384,832], # 317440 2.1667:1\
[1024,320],[320,1024], # 327680 3.2:1\
[1280,256],[256,1280], # 327680 5:1\
]
ASPECTS = [[512,512], # 262144 1:1
[576,448],[448,576], # 258048 1.29:1
[640,384],[384,640], # 245760 1.667:1
[768,320],[320,768], # 245760 2.4:1
[832,256],[256,832], # 212992 3.25:1
[896,256],[256,896], # 229376 3.5:1
[960,256],[256,960], # 245760 3.75:1
[1024,256],[256,1024], # 245760 4:1
]
def get_aspect_buckets(resolution):
if resolution < 512:
raise ValueError("Resolution must be at least 512")
try:
rounded_resolution = int(resolution / 64) * 64 # round down to nearest 64
all_image_sizes = __get_all_aspects()
aspects = next(filter(lambda sizes: sizes[0][0]==rounded_resolution, all_image_sizes), None) # find matching set of aspect ratios
return aspects
except Exception as e:
print(f" *** Could not find selected resolution: {rounded_resolution}, check your resolution in config YAML")
raise e
def __get_all_aspects():
return [ASPECTS, BIG_ASPECTS, HUGE_ASPECTS, MASSIVE_ASPECTS, GOD_ASPECTS]

View File

@ -2,37 +2,7 @@ import os
from PIL import Image from PIL import Image
import random import random
from ldm.data.image_train_item import ImageTrainItem from ldm.data.image_train_item import ImageTrainItem
import ldm.data.aspects as aspects
HUGE_ASPECTS = [[640,640], # 409600 1:1
[704,576],[576,704], # 405504 1:1.25
[768,512],[512,768], # 393216 1:1.5
[896,448],[448,896], # 401408 1:2
[1024,384],[384,1024], # 393216 1:2.667
[1280,320],[320,1280], # 409600 1:4
[1408,256],[256,1408], # 360448 1:5.5
[1472,256],[256,1472], # 376832 1:5.75
[1536,256],[256,1536], # 393216 1:6
[1600,256],[256,1600], # 409600 1:6.25
]
BIG_ASPECTS = [[576,576], # 331776 1:1\
[640,512],[512,640], # 327680 1.25:1\
[704,448],[448,704], # 314928 1.5625:1
[832,384],[384,832], # 317440 2.1667:1\
[1024,320],[320,1024], # 327680 3.2:1\
[1280,256],[256,1280], # 327680 5:1\
]
ASPECTS = [[512,512], # 1 262144\
[576,448],[448,576], # 1.29 258048\
[640,384],[384,640], # 1.67 245760\
[704,384],[384,704], # 1.83 245760\
[768,320],[320,768], # 2.4 245760\
[832,256],[256,832], # 3.25 212992\
[896,256],[256,896], # 3.5 229376\
[960,256],[256,960], # 3.75 245760\
[1024,256],[256,1024], # 4 245760\
]
class DataLoaderMultiAspect(): class DataLoaderMultiAspect():
""" """
@ -42,12 +12,13 @@ class DataLoaderMultiAspect():
batch_size: number of images per batch batch_size: number of images per batch
flip_p: probability of flipping image horizontally (i.e. 0-0.5) flip_p: probability of flipping image horizontally (i.e. 0-0.5)
""" """
def __init__(self, data_root, seed=555, debug_level=0, batch_size=1, flip_p=0.0, big_mode=0): def __init__(self, data_root, seed=555, debug_level=0, batch_size=1, flip_p=0.0, resolution=512):
self.image_paths = [] self.image_paths = []
self.debug_level = debug_level self.debug_level = debug_level
self.flip_p = flip_p self.flip_p = flip_p
self.big_mode = big_mode
self.aspects = aspects.get_aspect_buckets(resolution)
print(f"* DLMA resolution {resolution}, buckets: {self.aspects}")
print(" Preloading images...") print(" Preloading images...")
self.__recurse_data_root(self=self, recurse_root=data_root) self.__recurse_data_root(self=self, recurse_root=data_root)
@ -57,11 +28,12 @@ class DataLoaderMultiAspect():
if debug_level > 0: print(f" * DLMA Example: {self.image_caption_pairs[0]} images") if debug_level > 0: print(f" * DLMA Example: {self.image_caption_pairs[0]} images")
def get_all_images(self): def get_all_images(self):
return self.image_caption_pairs return self.image_caption_pairs
@staticmethod @staticmethod
def __read_caption_from_file(self, file_path, fallback_caption): def __read_caption_from_file(file_path, fallback_caption):
caption = fallback_caption caption = fallback_caption
try: try:
with open(file_path, 'r') as caption_file: with open(file_path, 'r') as caption_file:
@ -91,15 +63,13 @@ class DataLoaderMultiAspect():
else: else:
caption = caption_from_filename caption = caption_from_filename
if debug_level > 1: print(f" * DLMA file: {pathname} with caption: {caption}") #if debug_level > 1: print(f" * DLMA file: {pathname} with caption: {caption}")
image = Image.open(pathname) image = Image.open(pathname)
width, height = image.size width, height = image.size
image_aspect = width / height image_aspect = width / height
aspects = [ASPECTS, BIG_ASPECTS, HUGE_ASPECTS][self.big_mode] target_wh = min(self.aspects, key=lambda aspects:abs(aspects[0]/aspects[1] - image_aspect))
target_wh = min(aspects, key=lambda x:abs(x[0]/x[1]-image_aspect))
image_train_item = ImageTrainItem(image=None, caption=caption, target_wh=target_wh, pathname=pathname, flip_p=flip_p) image_train_item = ImageTrainItem(image=None, caption=caption, target_wh=target_wh, pathname=pathname, flip_p=flip_p)
@ -129,7 +99,9 @@ class DataLoaderMultiAspect():
truncate_count = len(buckets[bucket]) % batch_size truncate_count = len(buckets[bucket]) % batch_size
current_bucket_size = len(buckets[bucket]) current_bucket_size = len(buckets[bucket])
buckets[bucket] = buckets[bucket][:current_bucket_size - truncate_count] buckets[bucket] = buckets[bucket][:current_bucket_size - truncate_count]
print(f" ** Bucket {bucket} with {current_bucket_size} will drop {truncate_count} images due to batch size {batch_size}") if debug_level > 0 else None
if debug_level > 0:
print(f" ** Bucket {bucket} with {current_bucket_size} will drop {truncate_count} images due to batch size {batch_size}")
# flatten the buckets # flatten the buckets
image_caption_pairs = [] image_caption_pairs = []

View File

@ -1,6 +1,5 @@
import numpy as np import numpy as np
from torch.utils.data import Dataset from torch.utils.data import Dataset
from torchvision import transforms
from ldm.data.data_loader import DataLoaderMultiAspect as dlma from ldm.data.data_loader import DataLoaderMultiAspect as dlma
import math import math
import ldm.data.dl_singleton as dls import ldm.data.dl_singleton as dls

View File

@ -10,10 +10,10 @@ class EveryDreamBatch(Dataset):
data_root: root path of all your training images, will be recursively searched for images data_root: root path of all your training images, will be recursively searched for images
repeats: how many times to repeat each image in the dataset repeats: how many times to repeat each image in the dataset
flip_p: probability of flipping the image horizontally flip_p: probability of flipping the image horizontally
debug_level: 0=none, 1=print drops due to unfilled batches on aspect ratio buckets, 2=save crops to disk for inspection debug_level: 0=none, 1=print drops due to unfilled batches on aspect ratio buckets, 2=debug info per image, 3=save crops to disk for inspection
batch_size: how many images to return in a batch batch_size: how many images to return in a batch
conditional_dropout: probability of dropping the caption for a given image conditional_dropout: probability of dropping the caption for a given image
big_mode: 0=normal, 1=big, 2=biggest resolution: max resolution (relative to square)
jitter: number of pixels to jitter the crop by, only for non-square images jitter: number of pixels to jitter the crop by, only for non-square images
""" """
def __init__(self, def __init__(self,
@ -24,18 +24,22 @@ class EveryDreamBatch(Dataset):
batch_size=1, batch_size=1,
set='train', set='train',
conditional_dropout=0.0, conditional_dropout=0.0,
big_mode=0, resolution=512,
crop_jitter=0, crop_jitter=0,
seed=555,
image_cache_size=200
): ):
self.data_root = data_root self.data_root = data_root
self.batch_size = batch_size self.batch_size = batch_size
self.debug_level = debug_level self.debug_level = debug_level
self.conditional_dropout = conditional_dropout self.conditional_dropout = conditional_dropout
self.crop_jitter = crop_jitter self.crop_jitter = crop_jitter
self.unloaded_to_idx = 0
self.image_cache_size = image_cache_size
if not dls.shared_dataloader: if not dls.shared_dataloader:
print(" * Creating new dataloader singleton") print(" * Creating new dataloader singleton")
dls.shared_dataloader = dlma(data_root=data_root, debug_level=debug_level, batch_size=self.batch_size, flip_p=flip_p, big_mode=big_mode) dls.shared_dataloader = dlma(data_root=data_root, seed=seed, debug_level=debug_level, batch_size=self.batch_size, flip_p=flip_p, resolution=resolution)
self.image_train_items = dls.shared_dataloader.get_all_images() self.image_train_items = dls.shared_dataloader.get_all_images()
@ -54,20 +58,35 @@ class EveryDreamBatch(Dataset):
idx = i % self.num_images idx = i % self.num_images
image_train_item = self.image_train_items[idx] image_train_item = self.image_train_items[idx]
example = self.__get_image_for_trainer(image_train_item, self.debug_level) example = self.__get_image_for_trainer(image_train_item, self.debug_level)
if self.unloaded_to_idx > idx:
self.unloaded_to_idx = 0
if idx % (self.batch_size*3) == 0 and idx > (self.batch_size * 5) and idx > self.image_cache_size:
start_del = max(self.image_cache_size, self.unloaded_to_idx)
self.unloaded_to_idx = int(idx / self.batch_size)*self.batch_size - self.batch_size*8
print(f"{idx}: {start_del}, {self.unloaded_to_idx}") if self.debug_level > 1 else None
if self.unloaded_to_idx > self.image_cache_size:
for j in range(start_del, self.unloaded_to_idx):
del self.image_train_items[j].image
if self.debug_level > 1: print(f" * Unloaded images from idx {start_del} to {self.unloaded_to_idx}")
return example return example
def __get_image_for_trainer(self, image_train_item: ImageTrainItem, debug_level=0): def __get_image_for_trainer(self, image_train_item: ImageTrainItem, debug_level=0):
example = {} example = {}
save = debug_level > 1 save = debug_level > 2
image_train_tmp = image_train_item.hydrate(crop=False, save=save, crop_jitter=self.crop_jitter) image_train_tmp = image_train_item.hydrate(crop=False, save=save, crop_jitter=self.crop_jitter)
example["image"] = image_train_tmp.image example["image"] = image_train_tmp.image
#if random.random() > self.conditional_dropout: if random.random() > self.conditional_dropout:
example["caption"] = image_train_tmp.caption example["caption"] = image_train_tmp.caption
#else: else:
# example["caption"] = " " example["caption"] = " "
return example return example

View File

@ -8,7 +8,11 @@ import os
class ImageTrainItem(): class ImageTrainItem():
""" """
# [image, identifier, target_aspect, closest_aspect_wh(w,h), pathname] image: PIL.Image
identifier: caption,
target_aspect: (width, height),
pathname: path to image file
flip_p: probability of flipping image (0.0 to 1.0)
""" """
def __init__(self, image: PIL.Image, caption: str, target_wh: list, pathname: str, flip_p=0.0): def __init__(self, image: PIL.Image, caption: str, target_wh: list, pathname: str, flip_p=0.0):
self.caption = caption self.caption = caption
@ -18,49 +22,62 @@ class ImageTrainItem():
self.cropped_img = None self.cropped_img = None
if image is None: if image is None:
self.image = PIL.Image.new(mode='RGB',size=(1,1)) self.image = []
else: else:
self.image = image self.image = image
def hydrate(self, crop=False, save=False, crop_jitter=0): def hydrate(self, crop=False, save=False, crop_jitter=0):
self.image = PIL.Image.open(self.pathname).convert('RGB') """
crop: hard center crop to 512x512
save: save the cropped image to disk, for manual inspection of resize/crop
crop_jitter: randomly shift cropp by N pixels when using multiple aspect ratios to improve training quality
"""
if not hasattr(self, 'image') or len(self.image) == 0:
self.image = PIL.Image.open(self.pathname).convert('RGB')
width, height = self.image.size width, height = self.image.size
if crop: if crop:
cropped_img = self.__autocrop(self.image) cropped_img = self.__autocrop(self.image)
self.image = cropped_img.resize((512,512), resample=PIL.Image.BICUBIC) self.image = cropped_img.resize((512,512), resample=PIL.Image.BICUBIC)
else:
if width == 512 and height == 512:
pass
elif self.target_wh[0] == self.target_wh[1]:
pass
else: else:
width, height = self.image.size if self.target_wh[0] == self.target_wh[1]:
image_aspect = width / height pass
jitter_amount = random.randint(-crop_jitter, crop_jitter)
jitter_amount = min(jitter_amount, int(abs(width-height)/2))
target_aspect = self.target_wh[0] / self.target_wh[1]
if image_aspect > target_aspect:
new_width = int(height * target_aspect)
left = int((width - new_width) / 2) + jitter_amount
right = left + new_width
self.image = self.image.crop((left, 0, right, height))
else: else:
new_height = int(width / target_aspect) width, height = self.image.size
top = int((height - new_height) / 2) + jitter_amount image_aspect = width / height
bottom = top + new_height jitter_amount = random.randint(0, crop_jitter)
self.image = self.image.crop((0, top, width, bottom)) target_aspect = self.target_wh[0] / self.target_wh[1]
self.image = self.image.resize(self.target_wh, resample=PIL.Image.BICUBIC) print(f"{target_aspect}, {self.target_wh}")
if image_aspect > target_aspect:
new_width = int(height * target_aspect)
jitter_amount = max(min(jitter_amount, int(abs(width-new_width)/2)), 0)
left = jitter_amount
right = left + new_width
print(f"crop left: {left}, right: {right}, jitteramt:{jitter_amount}, [{width}, {height}] img: {self.pathname}")
self.image = self.image.crop((left, 0, right, height))
else:
new_height = int(width / target_aspect)
jitter_amount = max(min(jitter_amount, int(abs(height-new_height)/2)), 0)
top = jitter_amount
bottom = top + new_height
print(f"crop top: {top}, bottom: {bottom}, jitteramt:{jitter_amount}, [{width}, {height}] img: {self.pathname}")
self.image = self.image.crop((0, top, width, bottom))
self.image = self.image.resize(self.target_wh, resample=PIL.Image.BICUBIC)
self.image = self.flip(self.image) self.image = self.flip(self.image)
if save: # for manual inspection if type(self.image) is not np.ndarray:
base_name = os.path.basename(self.pathname) if save:
self.image.save(f"test/output/{random.randint(0,4)}/{base_name}") base_name = os.path.basename(self.pathname)
if not os.path.exists("test/output"):
os.makedirs("test/output")
self.image.save(f"test/output/{base_name}")
self.image = np.array(self.image).astype(np.uint8) self.image = np.array(self.image).astype(np.uint8)
self.image = (self.image / 127.5 - 1.0).astype(np.float32) self.image = (self.image / 127.5 - 1.0).astype(np.float32)
print(self.image.shape)
return self return self

View File

@ -96,3 +96,35 @@ class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2):
self.last_f = f self.last_f = f
return f return f
class EveryDreamScheduler:
"""
f_min: minimum lr multiplier
f_max: maximum lr multiplier
f_start: lr multiplier at the beginning of the warm-up phase
warm_up_steps: number of steps in the warm-up phase
steps_to_min: number of steps to reach the minimum lr multiplier
"""
def __init__(self, f_min=0.5, f_max=1.0, f_start=1.0, warm_up_steps=1000, steps_to_min=5000, verbosity_interval=100) -> None:
self.f_min = f_min
self.f_max = f_max
self.f_start = f_start
self.warm_up_steps = warm_up_steps
self.steps_to_min = steps_to_min
self.last_f = 0.
self.verbosity_interval = verbosity_interval
def __call__(self, n, **kwargs):
return self.schedule(n, **kwargs)
def schedule(self, n, **kawrgs):
if self.verbosity_interval > 0:
if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f:0.3f}, current cycle: {0}")
if n < self.warm_up_steps:
self.last_f = self.f_start
elif n < self.steps_to_min:
self.last_f = self.f_min + (self.f_max - self.f_min) * (self.steps_to_min - n) / (self.steps_to_min)
else:
self.last_f = self.f_min
return self.last_f

View File

@ -453,6 +453,7 @@ class LatentDiffusion(DDPM):
conditioning_key=None, conditioning_key=None,
scale_factor=1.0, scale_factor=1.0,
scale_by_std=False, scale_by_std=False,
scheduler_config=None,
*args, **kwargs): *args, **kwargs):
self.num_timesteps_cond = default(num_timesteps_cond, 1) self.num_timesteps_cond = default(num_timesteps_cond, 1)
@ -465,7 +466,7 @@ class LatentDiffusion(DDPM):
conditioning_key = None conditioning_key = None
ckpt_path = kwargs.pop("ckpt_path", None) ckpt_path = kwargs.pop("ckpt_path", None)
ignore_keys = kwargs.pop("ignore_keys", []) ignore_keys = kwargs.pop("ignore_keys", [])
super().__init__(conditioning_key=conditioning_key, *args, **kwargs) super().__init__(conditioning_key=conditioning_key, scheduler_config=scheduler_config, *args, **kwargs)
self.concat_mode = concat_mode self.concat_mode = concat_mode
self.cond_stage_trainable = cond_stage_trainable self.cond_stage_trainable = cond_stage_trainable
self.cond_stage_key = cond_stage_key self.cond_stage_key = cond_stage_key
@ -704,8 +705,6 @@ class LatentDiffusion(DDPM):
if cond_key != self.first_stage_key: if cond_key != self.first_stage_key:
if cond_key in ['caption', 'coordinates_bbox']: if cond_key in ['caption', 'coordinates_bbox']:
xc = batch[cond_key] xc = batch[cond_key]
elif cond_key == 'class_label':
xc = batch
else: else:
xc = super().get_input(batch, cond_key).to(self.device) xc = super().get_input(batch, cond_key).to(self.device)
else: else:

15
test/test_aspects.py Normal file
View File

@ -0,0 +1,15 @@
import ldm.data.aspects as aspects
resolutions = [512, 576, 640, 704, 768]
oops = [532, 576, 640, 704, 768]
for res in resolutions:
example_aspects = aspects.get_aspect_buckets(res)
print(f" *{res} buckets: {example_aspects}")
max_pixels = example_aspects[0][0] * example_aspects[0][1]
for aspect in example_aspects:
pixels = aspect[0] * aspect[1]
print (f"max: {max_pixels}: {aspect}: {pixels}, pct {pixels/max_pixels:.2f}")
assert pixels <= max_pixels, f" * {aspect} is larger than {max_pixels}"

36
test/test_batch.py Normal file
View File

@ -0,0 +1,36 @@
# script to test data loader by itself
# run from training root, edit the data_root manually
from ldm.data.every_dream import EveryDreamBatch
import time
s = time.perf_counter()
#data_root = "r:/everydream-trainer/test/input"
data_root = "r:/everydream-trainer/training_samples"
batch_size = 6
repeats=3
every_dream_batch = EveryDreamBatch(data_root=data_root, flip_p=0.0, debug_level=2, batch_size=batch_size, repeats=repeats, crop_jitter=25, conditional_dropout=0.3, resolution=512)
print(f" *TEST* EveryDreamBatch epoch image length: {len(every_dream_batch)}")
print(f" max test cycles: {int(len(every_dream_batch) / batch_size)}, batch_size: {batch_size}, repeats: {repeats}")
i = 0
while i < 99: # and i < len(every_dream_batch):
curr_batch = []
for j in range(i,i+batch_size):
curr_batch.append(every_dream_batch[j])
# all in batch must have the same image size
assert all(x == curr_batch[0]['image'].shape for x in [e['image'].shape for e in curr_batch])
assert all(x[0] > 2 for x in [e['image'].shape for e in curr_batch])
#print(f"idx: {i}, batch sample: shape: {curr_batch[0]['image'].shape}: {curr_batch[0]['caption']}")
i += batch_size
print(f" *TEST* test cycles: {i}")
print(f" *TEST* EveryDreamBatch epoch image length: {len(every_dream_batch)}")
elapsed = time.perf_counter() - s
print(f"{__file__} executed in {elapsed:5.2f} seconds.")

48
test/test_crop.py Normal file
View File

@ -0,0 +1,48 @@
# script to what cropping does to your images
# execute from root everydream-trainer folder
# ex.
# (everydream) R:\everydream-trainer>python scripts/test_crop.py
# dumps to /test/output
from ldm.data.every_dream import EveryDreamBatch
import time
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--data_root', type=str, default=None, help='root path of all your training images, will be recursively searched for images')
parser.add_argument('--resolution', type=int, default=512, help='resolution class, 512, 576, 640, 704, or 768')
args = parser.parse_args()
s = time.perf_counter()
# put in your own data_root here, WARNING don't do this on a lot of images unless you are prepared for it...
if args.data_root is None:
data_root = "R:/everydream-trainer/test/input"
else:
data_root = args.data_root
debug_level = 3 # 3 = dump images to disk after cropping and a bunch of crap into the console be warned
batch_size = 1
repeats = 1
crop_jitter = 50
resolution = args.resolution # 512, 576, 640, 704, 768
every_dream_batch = EveryDreamBatch(data_root=data_root, flip_p=0.0, debug_level=3, batch_size=batch_size, repeats=repeats, crop_jitter=crop_jitter, conditional_dropout=0.1, resolution=resolution)
print(f" *TEST* EveryDreamBatch epoch image length: {len(every_dream_batch)}")
print(f" max test cycles: {int(len(every_dream_batch) / batch_size)}, batch_size: {batch_size}, repeats: {repeats}")
i = 0
while i < len(every_dream_batch):
curr_batch = []
for j in range(i,i+batch_size):
curr_batch.append(every_dream_batch[j])
assert all(x == curr_batch[0]['image'].shape for x in [e['image'].shape for e in curr_batch])
assert all(x[0] > 2 for x in [e['image'].shape for e in curr_batch])
i += batch_size
print(f" *TEST* test cycles: {i}")
print(f" *TEST* EveryDreamBatch epoch image length: {len(every_dream_batch)}")
elapsed = time.perf_counter() - s
print(f"{__file__} executed in {elapsed:5.2f} seconds.")

18
test/test_dl.py Normal file
View File

@ -0,0 +1,18 @@
# script to test data loader by itself
# run from training root, edit the data_root manually
# python ldm/data/test_dl.py
import ldm.data.data_loader as dl
data_root = "r:/everydream-trainer/test/input"
data_loader = dl.DataLoaderMultiAspect(data_root=data_root, batch_size=2, seed=555, debug_level=2)
image_caption_pairs = data_loader.get_all_images()
print(f"Loaded {len(image_caption_pairs)} image-caption pairs")
for image_caption_pair in image_caption_pairs:
print(image_caption_pair)
print(f"**** Done loading. Loaded {len(image_caption_pairs)} images from data_root: {data_root} ****")

View File

@ -0,0 +1,24 @@
# script to test data loader by itself
# run from training root, edit the data_root manually
# python ldm/data/test_dl.py
from ldm.data.image_train_item import ImageTrainItem
import glob
import os
data_root = "training_samples\multiaspect"
for idx, f in enumerate(glob.iglob(f"{data_root}/*.jpg")):
for i in range(0, 40):
#print(f)
#image: PIL.Image, caption: str, target_wh: list, pathname: str, flip_p=0.0):
caption = os.path.basename(f)
caption = os.path.splitext(caption)[0]
my_iti = ImageTrainItem(None,caption,[512,512],f,0.0)
my_iti = my_iti.hydrate()
out_file_path = os.path.join(data_root, "output", f"{caption}_{i}.jpg")
#print(out_file_path)
my_iti.cropped_img.save(out_file_path)

7
test/test_linearsch.py Normal file
View File

@ -0,0 +1,7 @@
import ldm.lr_scheduler as lrs
#def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0):
sch = lrs.EveryDreamScheduler(warm_up_steps=10, f_min=5.0e-1, f_max=1.0, f_start=1.0, steps_to_min=25, verbosity_interval=5)
for i in range(50):
print(f"step {i}: {sch(i)}")