unload old images for better sys ram use, fix up crop jitter
This commit is contained in:
parent
d26fabac14
commit
b47c5c0ed3
|
@ -65,7 +65,7 @@ model:
|
||||||
data:
|
data:
|
||||||
target: main.DataModuleFromConfig
|
target: main.DataModuleFromConfig
|
||||||
params:
|
params:
|
||||||
batch_size: 6 # prefer highest possible without getting CUDA Out of Memory error
|
batch_size: 6 # prefer highest possible without getting CUDA Out of Memory error, A100 40GB =~20 80GB= ~48
|
||||||
num_workers: 6
|
num_workers: 6
|
||||||
wrap: falsegit
|
wrap: falsegit
|
||||||
train:
|
train:
|
||||||
|
@ -73,9 +73,10 @@ data:
|
||||||
params:
|
params:
|
||||||
repeats: 5 # rough suggestions: 5 with 5000+ images, 15 for 1000 images, use micro yaml for <100
|
repeats: 5 # rough suggestions: 5 with 5000+ images, 15 for 1000 images, use micro yaml for <100
|
||||||
debug_level: 1 # 1 to print if images are dropped due to multiple-aspect ratio image batching
|
debug_level: 1 # 1 to print if images are dropped due to multiple-aspect ratio image batching
|
||||||
conditional_dropout: 0.01 # experimental, likelihood to drop the caption, may help with poorly captioned images
|
conditional_dropout: 0.08 # experimental, likelihood to drop the caption, may help with poorly captioned images
|
||||||
crop_jitter: 5 # adds N pixels of jitter to cropping algorithm for non-square images only
|
crop_jitter: 5 # adds N pixels of jitter to cropping algorithm for non-square images only
|
||||||
big_mode: 0 # set to 1 or 2 to use larger image sizes for training, USES LOTS OF VRAM! Requires 40GB+
|
resolution: 512 # 512, 576, or 640, increases VRAM substantially
|
||||||
|
seed: 555 # seed used to shuffle the dataset ordering, keep constant for reproducibility
|
||||||
validation:
|
validation:
|
||||||
target: ldm.data.ed_validate.EDValidateBatch
|
target: ldm.data.ed_validate.EDValidateBatch
|
||||||
params:
|
params:
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
model:
|
||||||
|
base_learning_rate: 1.2e-6
|
||||||
|
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||||
|
params:
|
||||||
|
linear_start: 0.00085
|
||||||
|
linear_end: 0.0120
|
||||||
|
num_timesteps_cond: 1
|
||||||
|
log_every_t: 300
|
||||||
|
timesteps: 1000
|
||||||
|
first_stage_key: image
|
||||||
|
cond_stage_key: caption
|
||||||
|
image_size: 64
|
||||||
|
channels: 4
|
||||||
|
cond_stage_trainable: true
|
||||||
|
conditioning_key: crossattn
|
||||||
|
monitor: val/loss_simple_ema
|
||||||
|
scale_factor: 0.18215
|
||||||
|
use_ema: False
|
||||||
|
unfreeze_model: True
|
||||||
|
model_lr: 1.2e-6
|
||||||
|
|
||||||
|
unet_config:
|
||||||
|
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||||
|
params:
|
||||||
|
image_size: 32 # unused
|
||||||
|
in_channels: 4
|
||||||
|
out_channels: 4
|
||||||
|
model_channels: 320
|
||||||
|
attention_resolutions: [ 4, 2, 1 ]
|
||||||
|
num_res_blocks: 2
|
||||||
|
channel_mult: [ 1, 2, 4, 4 ]
|
||||||
|
num_heads: 8
|
||||||
|
use_spatial_transformer: True
|
||||||
|
transformer_depth: 1
|
||||||
|
context_dim: 768
|
||||||
|
use_checkpoint: True
|
||||||
|
legacy: False
|
||||||
|
|
||||||
|
first_stage_config:
|
||||||
|
target: ldm.models.autoencoder.AutoencoderKL
|
||||||
|
params:
|
||||||
|
embed_dim: 4
|
||||||
|
monitor: val/rec_loss
|
||||||
|
ddconfig:
|
||||||
|
double_z: true
|
||||||
|
z_channels: 4
|
||||||
|
resolution: 512
|
||||||
|
in_channels: 3
|
||||||
|
out_ch: 3
|
||||||
|
ch: 128
|
||||||
|
ch_mult:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 4
|
||||||
|
- 4
|
||||||
|
num_res_blocks: 2
|
||||||
|
attn_resolutions: []
|
||||||
|
dropout: 0.0
|
||||||
|
lossconfig:
|
||||||
|
target: torch.nn.Identity
|
||||||
|
|
||||||
|
cond_stage_config:
|
||||||
|
target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
|
||||||
|
|
||||||
|
data:
|
||||||
|
target: main.DataModuleFromConfig
|
||||||
|
params:
|
||||||
|
batch_size: 6 # prefer highest possible without getting CUDA Out of Memory error, A100 40GB =~20 80GB= ~48
|
||||||
|
num_workers: 6
|
||||||
|
wrap: falsegit
|
||||||
|
train:
|
||||||
|
target: ldm.data.every_dream.EveryDreamBatch
|
||||||
|
params:
|
||||||
|
repeats: 1 # rough suggestions: 5 with 5000+ images, 15 for 1000 images, use micro yaml for <100
|
||||||
|
debug_level: 1 # 1 to print if images are dropped due to multiple-aspect ratio image batching
|
||||||
|
conditional_dropout: 0.08 # experimental, likelihood to drop the caption, may help with poorly captioned images
|
||||||
|
crop_jitter: 15 # adds N pixels of jitter to cropping algorithm for non-square images only
|
||||||
|
big_mode: 0 # set to 1 or 2 to use larger image sizes for training, USES LOTS OF VRAM! Requires 40GB+
|
||||||
|
validation:
|
||||||
|
target: ldm.data.ed_validate.EDValidateBatch
|
||||||
|
params:
|
||||||
|
repeats: 0.3
|
||||||
|
test:
|
||||||
|
target: ldm.data.ed_validate.EDValidateBatch
|
||||||
|
params:
|
||||||
|
repeats: 0.2
|
||||||
|
|
||||||
|
lightning:
|
||||||
|
modelcheckpoint:
|
||||||
|
params:
|
||||||
|
every_n_epochs: 1 # produce a ckpt every epoch, leave 1!
|
||||||
|
#every_n_train_steps: 1400 # can only use epoch or train step checkpoints
|
||||||
|
save_top_k: 6 # save the best N ckpts according to loss, can reduce to save disk space but suggest at LEAST 2, more if you have max_epochs below higher!
|
||||||
|
save_last: True
|
||||||
|
filename: "{epoch:02d}-{step:05d}"
|
||||||
|
callbacks:
|
||||||
|
image_logger:
|
||||||
|
target: main.ImageLogger
|
||||||
|
params:
|
||||||
|
batch_frequency: 500
|
||||||
|
max_images: 16
|
||||||
|
increase_log_steps: False
|
||||||
|
|
||||||
|
trainer:
|
||||||
|
benchmark: True
|
||||||
|
max_epochs: 1 # better to run several epochs and test your checkpoints! Try 4-5, you get a checkpoint every epoch to test!
|
||||||
|
max_steps: 99000 # better to end on epochs not steps, especially with >500 images to ensure even distribution, but you can set this if you really want...
|
||||||
|
check_val_every_n_epoch: 1
|
||||||
|
gpus: 0,
|
|
@ -1,5 +1,5 @@
|
||||||
model:
|
model:
|
||||||
base_learning_rate: 1.0e-6
|
base_learning_rate: 1.2e-6
|
||||||
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
target: ldm.models.diffusion.ddpm.LatentDiffusion
|
||||||
params:
|
params:
|
||||||
linear_start: 0.00085
|
linear_start: 0.00085
|
||||||
|
@ -17,16 +17,17 @@ model:
|
||||||
scale_factor: 0.18215
|
scale_factor: 0.18215
|
||||||
use_ema: False
|
use_ema: False
|
||||||
unfreeze_model: True
|
unfreeze_model: True
|
||||||
#model_lr: 1.0e-6
|
model_lr: 1.1e-6
|
||||||
|
#use_scheduler: True
|
||||||
scheduler_config:
|
scheduler_config:
|
||||||
target: ldm.lr_scheduler.LambdaLinearScheduler
|
target: ldm.lr_scheduler.EveryDreamScheduler
|
||||||
params:
|
params:
|
||||||
warm_up_steps: [ 5 ]
|
f_start: 5.0e-1 # starting LR multiplier
|
||||||
cycle_lengths: [ 1000 ] # incredibly large number to prevent corner cases
|
warm_up_steps: 50 # number of steps to warm up to f_start before decaying LR
|
||||||
verbosity_interval: 25 # how often to print LR updates
|
f_max: 1.0 # maximum LR multiplier
|
||||||
f_start: [ 1.e-6 ]
|
f_min: 5.0e-1 # minimum LR multiplier
|
||||||
f_max: [ 1.e-6 ] # 1.
|
steps_to_min: 10000 # number of steps to decay from f_max to f_min
|
||||||
f_min: [ 1.e-8 ] # 1.
|
verbosity_interval: 200 # how often to print LR multiplier (steps)
|
||||||
|
|
||||||
unet_config:
|
unet_config:
|
||||||
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
target: ldm.modules.diffusionmodules.openaimodel.UNetModel
|
||||||
|
@ -80,13 +81,16 @@ data:
|
||||||
train:
|
train:
|
||||||
target: ldm.data.every_dream.EveryDreamBatch
|
target: ldm.data.every_dream.EveryDreamBatch
|
||||||
params:
|
params:
|
||||||
repeats: 5
|
repeats: 1 # rough suggestions: 5 with 5000+ images, 15 for 1000 images, use micro yaml for <100
|
||||||
flip_p: 0
|
debug_level: 1 # 1 to print if images are dropped due to multiple-aspect ratio image batching
|
||||||
debug_level: 1
|
conditional_dropout: 0.08 # experimental, likelihood to drop the caption, may help with poorly captioned images
|
||||||
|
crop_jitter: 20 # adds N pixels of jitter to cropping algorithm for non-square images only
|
||||||
|
resolution: 512 # defines max pixels for all aspects, 512, 576, 640, 704, or 768
|
||||||
|
seed: 555 # seed used to shuffle the dataset, keep constant for reproducibility
|
||||||
validation:
|
validation:
|
||||||
target: ldm.data.ed_validate.EDValidateBatch
|
target: ldm.data.ed_validate.EDValidateBatch
|
||||||
params:
|
params:
|
||||||
repeats: 0.5
|
repeats: 0.25
|
||||||
test:
|
test:
|
||||||
target: ldm.data.ed_validate.EDValidateBatch
|
target: ldm.data.ed_validate.EDValidateBatch
|
||||||
params:
|
params:
|
||||||
|
@ -96,21 +100,21 @@ lightning:
|
||||||
modelcheckpoint:
|
modelcheckpoint:
|
||||||
params:
|
params:
|
||||||
every_n_epochs: 1
|
every_n_epochs: 1
|
||||||
#every_n_train_steps: 1400 # can only use every_n_epochs OR every_n_train_steps, suggest you stick with epochs
|
#every_n_train_steps: 1500 # can only use every_n_epochs OR every_n_train_steps, suggest you stick with epochs
|
||||||
save_last: True
|
save_last: True
|
||||||
save_top_k: 5
|
save_top_k: 99
|
||||||
filename: "{epoch:02d}-{step:05d}"
|
filename: "{epoch:02d}-{step:05d}"
|
||||||
callbacks:
|
callbacks:
|
||||||
image_logger:
|
image_logger:
|
||||||
target: main.ImageLogger
|
target: main.ImageLogger
|
||||||
params:
|
params:
|
||||||
batch_frequency: 100
|
batch_frequency: 400
|
||||||
max_images: 16
|
max_images: 16
|
||||||
increase_log_steps: False
|
increase_log_steps: False
|
||||||
|
|
||||||
trainer:
|
trainer:
|
||||||
benchmark: True
|
benchmark: True
|
||||||
max_epochs: 4
|
max_epochs: 5
|
||||||
max_steps: 99000 # better to end on epochs not steps, especially with >500 images to ensure even distribution, but you can set this if you really want...
|
max_steps: 99000 # better to end on epochs not steps, especially with >500 images to ensure even distribution, but you can set this if you really want...
|
||||||
check_val_every_n_epoch: 1
|
check_val_every_n_epoch: 1
|
||||||
gpus: 0,
|
gpus: 0,
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 196 KiB |
|
@ -0,0 +1,76 @@
|
||||||
|
|
||||||
|
GOD_ASPECTS = [[768,768], # 589824 1:1
|
||||||
|
[832,704],[704,832], # 585728 1.181:1
|
||||||
|
[896,640],[640,896], # 573440 1.4:1
|
||||||
|
[960,576],[576,960], # 552960 1.6:1
|
||||||
|
[1024,576],[576,1024], # 524288 1.778:1
|
||||||
|
[1088,512],[512,1088], # 497664 2.125:1
|
||||||
|
[1152,512],[512,1152], # 589824 2.25:1
|
||||||
|
[1216,448],[448,1216], # 552960 2.714:1
|
||||||
|
[1280,448],[448,1280], # 573440 2.857:1
|
||||||
|
[1344,384],[384,1344], # 518400 3.5:1
|
||||||
|
[1408,384],[384,1408], # 540672 3.667:1
|
||||||
|
[1472,320],[320,1472], # 470400 4.6:1
|
||||||
|
[1536,320],[320,1536], # 491520 4.8:1
|
||||||
|
]
|
||||||
|
|
||||||
|
MASSIVE_ASPECTS = [[704,704], # 501,376 1:1
|
||||||
|
[768,640],[640,768], # 491,520 1.2:1
|
||||||
|
[832,576],[576,832], # 458,752 1.444:1
|
||||||
|
[896,512],[512,896], # 458,752 1.75:1
|
||||||
|
[960,512],[512,960], # 491,520 1.875:1
|
||||||
|
[1024,448],[448,1024], # 458,752 2.286:1
|
||||||
|
[1088,448],[448,1088], # 487,424 2.429:1
|
||||||
|
[1152,384],[384,1152], # 442,368 3:1
|
||||||
|
[1216,384],[384,1216], # 466,944 3.125:1
|
||||||
|
[1280,384],[384,1280], # 491,520 3.333:1
|
||||||
|
[1280,320],[320,1280], # 409,600 4:1
|
||||||
|
[1408,320],[320,1408], # 450,560 4.4:1
|
||||||
|
[1536,320],[320,1536], # 491,520 4.8:1
|
||||||
|
]
|
||||||
|
|
||||||
|
HUGE_ASPECTS = [[640,640], # 409600 1:1
|
||||||
|
[704,576],[576,704], # 405504 1.25:1
|
||||||
|
[768,512],[512,768], # 393216 1.5:1
|
||||||
|
[896,448],[448,896], # 401408 2:1
|
||||||
|
[1024,384],[384,1024], # 393216 2.667:1
|
||||||
|
[1280,320],[320,1280], # 409600 4:1
|
||||||
|
[1408,256],[256,1408], # 360448 5.5:1
|
||||||
|
[1472,256],[256,1472], # 376832 5.75:1
|
||||||
|
[1536,256],[256,1536], # 393216 6:1
|
||||||
|
[1600,256],[256,1600], # 409600 6.25:1
|
||||||
|
]
|
||||||
|
|
||||||
|
BIG_ASPECTS = [[576,576], # 331776 1:1\
|
||||||
|
[640,512],[512,640], # 327680 1.25:1\
|
||||||
|
[640,448],[448,640], # 286720 1.4286:1\
|
||||||
|
[704,448],[448,704], # 314928 1.5625:1
|
||||||
|
[832,384],[384,832], # 317440 2.1667:1\
|
||||||
|
[1024,320],[320,1024], # 327680 3.2:1\
|
||||||
|
[1280,256],[256,1280], # 327680 5:1\
|
||||||
|
]
|
||||||
|
|
||||||
|
ASPECTS = [[512,512], # 262144 1:1
|
||||||
|
[576,448],[448,576], # 258048 1.29:1
|
||||||
|
[640,384],[384,640], # 245760 1.667:1
|
||||||
|
[768,320],[320,768], # 245760 2.4:1
|
||||||
|
[832,256],[256,832], # 212992 3.25:1
|
||||||
|
[896,256],[256,896], # 229376 3.5:1
|
||||||
|
[960,256],[256,960], # 245760 3.75:1
|
||||||
|
[1024,256],[256,1024], # 245760 4:1
|
||||||
|
]
|
||||||
|
|
||||||
|
def get_aspect_buckets(resolution):
|
||||||
|
if resolution < 512:
|
||||||
|
raise ValueError("Resolution must be at least 512")
|
||||||
|
try:
|
||||||
|
rounded_resolution = int(resolution / 64) * 64 # round down to nearest 64
|
||||||
|
all_image_sizes = __get_all_aspects()
|
||||||
|
aspects = next(filter(lambda sizes: sizes[0][0]==rounded_resolution, all_image_sizes), None) # find matching set of aspect ratios
|
||||||
|
return aspects
|
||||||
|
except Exception as e:
|
||||||
|
print(f" *** Could not find selected resolution: {rounded_resolution}, check your resolution in config YAML")
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def __get_all_aspects():
|
||||||
|
return [ASPECTS, BIG_ASPECTS, HUGE_ASPECTS, MASSIVE_ASPECTS, GOD_ASPECTS]
|
|
@ -2,37 +2,7 @@ import os
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import random
|
import random
|
||||||
from ldm.data.image_train_item import ImageTrainItem
|
from ldm.data.image_train_item import ImageTrainItem
|
||||||
|
import ldm.data.aspects as aspects
|
||||||
HUGE_ASPECTS = [[640,640], # 409600 1:1
|
|
||||||
[704,576],[576,704], # 405504 1:1.25
|
|
||||||
[768,512],[512,768], # 393216 1:1.5
|
|
||||||
[896,448],[448,896], # 401408 1:2
|
|
||||||
[1024,384],[384,1024], # 393216 1:2.667
|
|
||||||
[1280,320],[320,1280], # 409600 1:4
|
|
||||||
[1408,256],[256,1408], # 360448 1:5.5
|
|
||||||
[1472,256],[256,1472], # 376832 1:5.75
|
|
||||||
[1536,256],[256,1536], # 393216 1:6
|
|
||||||
[1600,256],[256,1600], # 409600 1:6.25
|
|
||||||
]
|
|
||||||
|
|
||||||
BIG_ASPECTS = [[576,576], # 331776 1:1\
|
|
||||||
[640,512],[512,640], # 327680 1.25:1\
|
|
||||||
[704,448],[448,704], # 314928 1.5625:1
|
|
||||||
[832,384],[384,832], # 317440 2.1667:1\
|
|
||||||
[1024,320],[320,1024], # 327680 3.2:1\
|
|
||||||
[1280,256],[256,1280], # 327680 5:1\
|
|
||||||
]
|
|
||||||
|
|
||||||
ASPECTS = [[512,512], # 1 262144\
|
|
||||||
[576,448],[448,576], # 1.29 258048\
|
|
||||||
[640,384],[384,640], # 1.67 245760\
|
|
||||||
[704,384],[384,704], # 1.83 245760\
|
|
||||||
[768,320],[320,768], # 2.4 245760\
|
|
||||||
[832,256],[256,832], # 3.25 212992\
|
|
||||||
[896,256],[256,896], # 3.5 229376\
|
|
||||||
[960,256],[256,960], # 3.75 245760\
|
|
||||||
[1024,256],[256,1024], # 4 245760\
|
|
||||||
]
|
|
||||||
|
|
||||||
class DataLoaderMultiAspect():
|
class DataLoaderMultiAspect():
|
||||||
"""
|
"""
|
||||||
|
@ -42,12 +12,13 @@ class DataLoaderMultiAspect():
|
||||||
batch_size: number of images per batch
|
batch_size: number of images per batch
|
||||||
flip_p: probability of flipping image horizontally (i.e. 0-0.5)
|
flip_p: probability of flipping image horizontally (i.e. 0-0.5)
|
||||||
"""
|
"""
|
||||||
def __init__(self, data_root, seed=555, debug_level=0, batch_size=1, flip_p=0.0, big_mode=0):
|
def __init__(self, data_root, seed=555, debug_level=0, batch_size=1, flip_p=0.0, resolution=512):
|
||||||
self.image_paths = []
|
self.image_paths = []
|
||||||
self.debug_level = debug_level
|
self.debug_level = debug_level
|
||||||
self.flip_p = flip_p
|
self.flip_p = flip_p
|
||||||
self.big_mode = big_mode
|
|
||||||
|
|
||||||
|
self.aspects = aspects.get_aspect_buckets(resolution)
|
||||||
|
print(f"* DLMA resolution {resolution}, buckets: {self.aspects}")
|
||||||
print(" Preloading images...")
|
print(" Preloading images...")
|
||||||
|
|
||||||
self.__recurse_data_root(self=self, recurse_root=data_root)
|
self.__recurse_data_root(self=self, recurse_root=data_root)
|
||||||
|
@ -57,11 +28,12 @@ class DataLoaderMultiAspect():
|
||||||
|
|
||||||
if debug_level > 0: print(f" * DLMA Example: {self.image_caption_pairs[0]} images")
|
if debug_level > 0: print(f" * DLMA Example: {self.image_caption_pairs[0]} images")
|
||||||
|
|
||||||
|
|
||||||
def get_all_images(self):
|
def get_all_images(self):
|
||||||
return self.image_caption_pairs
|
return self.image_caption_pairs
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __read_caption_from_file(self, file_path, fallback_caption):
|
def __read_caption_from_file(file_path, fallback_caption):
|
||||||
caption = fallback_caption
|
caption = fallback_caption
|
||||||
try:
|
try:
|
||||||
with open(file_path, 'r') as caption_file:
|
with open(file_path, 'r') as caption_file:
|
||||||
|
@ -91,15 +63,13 @@ class DataLoaderMultiAspect():
|
||||||
else:
|
else:
|
||||||
caption = caption_from_filename
|
caption = caption_from_filename
|
||||||
|
|
||||||
if debug_level > 1: print(f" * DLMA file: {pathname} with caption: {caption}")
|
#if debug_level > 1: print(f" * DLMA file: {pathname} with caption: {caption}")
|
||||||
|
|
||||||
image = Image.open(pathname)
|
image = Image.open(pathname)
|
||||||
width, height = image.size
|
width, height = image.size
|
||||||
image_aspect = width / height
|
image_aspect = width / height
|
||||||
|
|
||||||
aspects = [ASPECTS, BIG_ASPECTS, HUGE_ASPECTS][self.big_mode]
|
target_wh = min(self.aspects, key=lambda aspects:abs(aspects[0]/aspects[1] - image_aspect))
|
||||||
|
|
||||||
target_wh = min(aspects, key=lambda x:abs(x[0]/x[1]-image_aspect))
|
|
||||||
|
|
||||||
image_train_item = ImageTrainItem(image=None, caption=caption, target_wh=target_wh, pathname=pathname, flip_p=flip_p)
|
image_train_item = ImageTrainItem(image=None, caption=caption, target_wh=target_wh, pathname=pathname, flip_p=flip_p)
|
||||||
|
|
||||||
|
@ -129,7 +99,9 @@ class DataLoaderMultiAspect():
|
||||||
truncate_count = len(buckets[bucket]) % batch_size
|
truncate_count = len(buckets[bucket]) % batch_size
|
||||||
current_bucket_size = len(buckets[bucket])
|
current_bucket_size = len(buckets[bucket])
|
||||||
buckets[bucket] = buckets[bucket][:current_bucket_size - truncate_count]
|
buckets[bucket] = buckets[bucket][:current_bucket_size - truncate_count]
|
||||||
print(f" ** Bucket {bucket} with {current_bucket_size} will drop {truncate_count} images due to batch size {batch_size}") if debug_level > 0 else None
|
|
||||||
|
if debug_level > 0:
|
||||||
|
print(f" ** Bucket {bucket} with {current_bucket_size} will drop {truncate_count} images due to batch size {batch_size}")
|
||||||
|
|
||||||
# flatten the buckets
|
# flatten the buckets
|
||||||
image_caption_pairs = []
|
image_caption_pairs = []
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from torch.utils.data import Dataset
|
from torch.utils.data import Dataset
|
||||||
from torchvision import transforms
|
|
||||||
from ldm.data.data_loader import DataLoaderMultiAspect as dlma
|
from ldm.data.data_loader import DataLoaderMultiAspect as dlma
|
||||||
import math
|
import math
|
||||||
import ldm.data.dl_singleton as dls
|
import ldm.data.dl_singleton as dls
|
||||||
|
|
|
@ -10,10 +10,10 @@ class EveryDreamBatch(Dataset):
|
||||||
data_root: root path of all your training images, will be recursively searched for images
|
data_root: root path of all your training images, will be recursively searched for images
|
||||||
repeats: how many times to repeat each image in the dataset
|
repeats: how many times to repeat each image in the dataset
|
||||||
flip_p: probability of flipping the image horizontally
|
flip_p: probability of flipping the image horizontally
|
||||||
debug_level: 0=none, 1=print drops due to unfilled batches on aspect ratio buckets, 2=save crops to disk for inspection
|
debug_level: 0=none, 1=print drops due to unfilled batches on aspect ratio buckets, 2=debug info per image, 3=save crops to disk for inspection
|
||||||
batch_size: how many images to return in a batch
|
batch_size: how many images to return in a batch
|
||||||
conditional_dropout: probability of dropping the caption for a given image
|
conditional_dropout: probability of dropping the caption for a given image
|
||||||
big_mode: 0=normal, 1=big, 2=biggest
|
resolution: max resolution (relative to square)
|
||||||
jitter: number of pixels to jitter the crop by, only for non-square images
|
jitter: number of pixels to jitter the crop by, only for non-square images
|
||||||
"""
|
"""
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
|
@ -24,18 +24,22 @@ class EveryDreamBatch(Dataset):
|
||||||
batch_size=1,
|
batch_size=1,
|
||||||
set='train',
|
set='train',
|
||||||
conditional_dropout=0.0,
|
conditional_dropout=0.0,
|
||||||
big_mode=0,
|
resolution=512,
|
||||||
crop_jitter=0,
|
crop_jitter=0,
|
||||||
|
seed=555,
|
||||||
|
image_cache_size=200
|
||||||
):
|
):
|
||||||
self.data_root = data_root
|
self.data_root = data_root
|
||||||
self.batch_size = batch_size
|
self.batch_size = batch_size
|
||||||
self.debug_level = debug_level
|
self.debug_level = debug_level
|
||||||
self.conditional_dropout = conditional_dropout
|
self.conditional_dropout = conditional_dropout
|
||||||
self.crop_jitter = crop_jitter
|
self.crop_jitter = crop_jitter
|
||||||
|
self.unloaded_to_idx = 0
|
||||||
|
self.image_cache_size = image_cache_size
|
||||||
|
|
||||||
if not dls.shared_dataloader:
|
if not dls.shared_dataloader:
|
||||||
print(" * Creating new dataloader singleton")
|
print(" * Creating new dataloader singleton")
|
||||||
dls.shared_dataloader = dlma(data_root=data_root, debug_level=debug_level, batch_size=self.batch_size, flip_p=flip_p, big_mode=big_mode)
|
dls.shared_dataloader = dlma(data_root=data_root, seed=seed, debug_level=debug_level, batch_size=self.batch_size, flip_p=flip_p, resolution=resolution)
|
||||||
|
|
||||||
self.image_train_items = dls.shared_dataloader.get_all_images()
|
self.image_train_items = dls.shared_dataloader.get_all_images()
|
||||||
|
|
||||||
|
@ -54,20 +58,35 @@ class EveryDreamBatch(Dataset):
|
||||||
idx = i % self.num_images
|
idx = i % self.num_images
|
||||||
image_train_item = self.image_train_items[idx]
|
image_train_item = self.image_train_items[idx]
|
||||||
example = self.__get_image_for_trainer(image_train_item, self.debug_level)
|
example = self.__get_image_for_trainer(image_train_item, self.debug_level)
|
||||||
|
|
||||||
|
if self.unloaded_to_idx > idx:
|
||||||
|
self.unloaded_to_idx = 0
|
||||||
|
|
||||||
|
if idx % (self.batch_size*3) == 0 and idx > (self.batch_size * 5) and idx > self.image_cache_size:
|
||||||
|
start_del = max(self.image_cache_size, self.unloaded_to_idx)
|
||||||
|
self.unloaded_to_idx = int(idx / self.batch_size)*self.batch_size - self.batch_size*8
|
||||||
|
|
||||||
|
print(f"{idx}: {start_del}, {self.unloaded_to_idx}") if self.debug_level > 1 else None
|
||||||
|
|
||||||
|
if self.unloaded_to_idx > self.image_cache_size:
|
||||||
|
for j in range(start_del, self.unloaded_to_idx):
|
||||||
|
del self.image_train_items[j].image
|
||||||
|
if self.debug_level > 1: print(f" * Unloaded images from idx {start_del} to {self.unloaded_to_idx}")
|
||||||
|
|
||||||
return example
|
return example
|
||||||
|
|
||||||
def __get_image_for_trainer(self, image_train_item: ImageTrainItem, debug_level=0):
|
def __get_image_for_trainer(self, image_train_item: ImageTrainItem, debug_level=0):
|
||||||
example = {}
|
example = {}
|
||||||
|
|
||||||
save = debug_level > 1
|
save = debug_level > 2
|
||||||
|
|
||||||
image_train_tmp = image_train_item.hydrate(crop=False, save=save, crop_jitter=self.crop_jitter)
|
image_train_tmp = image_train_item.hydrate(crop=False, save=save, crop_jitter=self.crop_jitter)
|
||||||
|
|
||||||
example["image"] = image_train_tmp.image
|
example["image"] = image_train_tmp.image
|
||||||
|
|
||||||
#if random.random() > self.conditional_dropout:
|
if random.random() > self.conditional_dropout:
|
||||||
example["caption"] = image_train_tmp.caption
|
example["caption"] = image_train_tmp.caption
|
||||||
#else:
|
else:
|
||||||
# example["caption"] = " "
|
example["caption"] = " "
|
||||||
|
|
||||||
return example
|
return example
|
||||||
|
|
|
@ -8,7 +8,11 @@ import os
|
||||||
|
|
||||||
class ImageTrainItem():
|
class ImageTrainItem():
|
||||||
"""
|
"""
|
||||||
# [image, identifier, target_aspect, closest_aspect_wh(w,h), pathname]
|
image: PIL.Image
|
||||||
|
identifier: caption,
|
||||||
|
target_aspect: (width, height),
|
||||||
|
pathname: path to image file
|
||||||
|
flip_p: probability of flipping image (0.0 to 1.0)
|
||||||
"""
|
"""
|
||||||
def __init__(self, image: PIL.Image, caption: str, target_wh: list, pathname: str, flip_p=0.0):
|
def __init__(self, image: PIL.Image, caption: str, target_wh: list, pathname: str, flip_p=0.0):
|
||||||
self.caption = caption
|
self.caption = caption
|
||||||
|
@ -18,49 +22,62 @@ class ImageTrainItem():
|
||||||
self.cropped_img = None
|
self.cropped_img = None
|
||||||
|
|
||||||
if image is None:
|
if image is None:
|
||||||
self.image = PIL.Image.new(mode='RGB',size=(1,1))
|
self.image = []
|
||||||
else:
|
else:
|
||||||
self.image = image
|
self.image = image
|
||||||
|
|
||||||
def hydrate(self, crop=False, save=False, crop_jitter=0):
|
def hydrate(self, crop=False, save=False, crop_jitter=0):
|
||||||
self.image = PIL.Image.open(self.pathname).convert('RGB')
|
"""
|
||||||
|
crop: hard center crop to 512x512
|
||||||
|
save: save the cropped image to disk, for manual inspection of resize/crop
|
||||||
|
crop_jitter: randomly shift cropp by N pixels when using multiple aspect ratios to improve training quality
|
||||||
|
"""
|
||||||
|
if not hasattr(self, 'image') or len(self.image) == 0:
|
||||||
|
self.image = PIL.Image.open(self.pathname).convert('RGB')
|
||||||
|
|
||||||
width, height = self.image.size
|
width, height = self.image.size
|
||||||
if crop:
|
if crop:
|
||||||
cropped_img = self.__autocrop(self.image)
|
cropped_img = self.__autocrop(self.image)
|
||||||
self.image = cropped_img.resize((512,512), resample=PIL.Image.BICUBIC)
|
self.image = cropped_img.resize((512,512), resample=PIL.Image.BICUBIC)
|
||||||
else:
|
|
||||||
if width == 512 and height == 512:
|
|
||||||
pass
|
|
||||||
elif self.target_wh[0] == self.target_wh[1]:
|
|
||||||
pass
|
|
||||||
else:
|
else:
|
||||||
width, height = self.image.size
|
if self.target_wh[0] == self.target_wh[1]:
|
||||||
image_aspect = width / height
|
pass
|
||||||
jitter_amount = random.randint(-crop_jitter, crop_jitter)
|
|
||||||
jitter_amount = min(jitter_amount, int(abs(width-height)/2))
|
|
||||||
target_aspect = self.target_wh[0] / self.target_wh[1]
|
|
||||||
if image_aspect > target_aspect:
|
|
||||||
new_width = int(height * target_aspect)
|
|
||||||
left = int((width - new_width) / 2) + jitter_amount
|
|
||||||
right = left + new_width
|
|
||||||
self.image = self.image.crop((left, 0, right, height))
|
|
||||||
else:
|
else:
|
||||||
new_height = int(width / target_aspect)
|
width, height = self.image.size
|
||||||
top = int((height - new_height) / 2) + jitter_amount
|
image_aspect = width / height
|
||||||
bottom = top + new_height
|
jitter_amount = random.randint(0, crop_jitter)
|
||||||
self.image = self.image.crop((0, top, width, bottom))
|
target_aspect = self.target_wh[0] / self.target_wh[1]
|
||||||
self.image = self.image.resize(self.target_wh, resample=PIL.Image.BICUBIC)
|
print(f"{target_aspect}, {self.target_wh}")
|
||||||
|
if image_aspect > target_aspect:
|
||||||
|
new_width = int(height * target_aspect)
|
||||||
|
jitter_amount = max(min(jitter_amount, int(abs(width-new_width)/2)), 0)
|
||||||
|
left = jitter_amount
|
||||||
|
right = left + new_width
|
||||||
|
print(f"crop left: {left}, right: {right}, jitteramt:{jitter_amount}, [{width}, {height}] img: {self.pathname}")
|
||||||
|
self.image = self.image.crop((left, 0, right, height))
|
||||||
|
else:
|
||||||
|
new_height = int(width / target_aspect)
|
||||||
|
jitter_amount = max(min(jitter_amount, int(abs(height-new_height)/2)), 0)
|
||||||
|
top = jitter_amount
|
||||||
|
bottom = top + new_height
|
||||||
|
print(f"crop top: {top}, bottom: {bottom}, jitteramt:{jitter_amount}, [{width}, {height}] img: {self.pathname}")
|
||||||
|
self.image = self.image.crop((0, top, width, bottom))
|
||||||
|
self.image = self.image.resize(self.target_wh, resample=PIL.Image.BICUBIC)
|
||||||
|
|
||||||
self.image = self.flip(self.image)
|
self.image = self.flip(self.image)
|
||||||
|
|
||||||
if save: # for manual inspection
|
if type(self.image) is not np.ndarray:
|
||||||
base_name = os.path.basename(self.pathname)
|
if save:
|
||||||
self.image.save(f"test/output/{random.randint(0,4)}/{base_name}")
|
base_name = os.path.basename(self.pathname)
|
||||||
|
if not os.path.exists("test/output"):
|
||||||
|
os.makedirs("test/output")
|
||||||
|
self.image.save(f"test/output/{base_name}")
|
||||||
|
|
||||||
self.image = np.array(self.image).astype(np.uint8)
|
self.image = np.array(self.image).astype(np.uint8)
|
||||||
|
|
||||||
self.image = (self.image / 127.5 - 1.0).astype(np.float32)
|
self.image = (self.image / 127.5 - 1.0).astype(np.float32)
|
||||||
|
|
||||||
|
print(self.image.shape)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
|
@ -96,3 +96,35 @@ class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2):
|
||||||
self.last_f = f
|
self.last_f = f
|
||||||
return f
|
return f
|
||||||
|
|
||||||
|
class EveryDreamScheduler:
|
||||||
|
"""
|
||||||
|
f_min: minimum lr multiplier
|
||||||
|
f_max: maximum lr multiplier
|
||||||
|
f_start: lr multiplier at the beginning of the warm-up phase
|
||||||
|
warm_up_steps: number of steps in the warm-up phase
|
||||||
|
steps_to_min: number of steps to reach the minimum lr multiplier
|
||||||
|
"""
|
||||||
|
def __init__(self, f_min=0.5, f_max=1.0, f_start=1.0, warm_up_steps=1000, steps_to_min=5000, verbosity_interval=100) -> None:
|
||||||
|
self.f_min = f_min
|
||||||
|
self.f_max = f_max
|
||||||
|
self.f_start = f_start
|
||||||
|
self.warm_up_steps = warm_up_steps
|
||||||
|
self.steps_to_min = steps_to_min
|
||||||
|
self.last_f = 0.
|
||||||
|
self.verbosity_interval = verbosity_interval
|
||||||
|
|
||||||
|
def __call__(self, n, **kwargs):
|
||||||
|
return self.schedule(n, **kwargs)
|
||||||
|
|
||||||
|
def schedule(self, n, **kawrgs):
|
||||||
|
if self.verbosity_interval > 0:
|
||||||
|
if n % self.verbosity_interval == 0: print(f"current step: {n}, recent lr-multiplier: {self.last_f:0.3f}, current cycle: {0}")
|
||||||
|
|
||||||
|
if n < self.warm_up_steps:
|
||||||
|
self.last_f = self.f_start
|
||||||
|
elif n < self.steps_to_min:
|
||||||
|
self.last_f = self.f_min + (self.f_max - self.f_min) * (self.steps_to_min - n) / (self.steps_to_min)
|
||||||
|
else:
|
||||||
|
self.last_f = self.f_min
|
||||||
|
|
||||||
|
return self.last_f
|
|
@ -453,6 +453,7 @@ class LatentDiffusion(DDPM):
|
||||||
conditioning_key=None,
|
conditioning_key=None,
|
||||||
scale_factor=1.0,
|
scale_factor=1.0,
|
||||||
scale_by_std=False,
|
scale_by_std=False,
|
||||||
|
scheduler_config=None,
|
||||||
*args, **kwargs):
|
*args, **kwargs):
|
||||||
|
|
||||||
self.num_timesteps_cond = default(num_timesteps_cond, 1)
|
self.num_timesteps_cond = default(num_timesteps_cond, 1)
|
||||||
|
@ -465,7 +466,7 @@ class LatentDiffusion(DDPM):
|
||||||
conditioning_key = None
|
conditioning_key = None
|
||||||
ckpt_path = kwargs.pop("ckpt_path", None)
|
ckpt_path = kwargs.pop("ckpt_path", None)
|
||||||
ignore_keys = kwargs.pop("ignore_keys", [])
|
ignore_keys = kwargs.pop("ignore_keys", [])
|
||||||
super().__init__(conditioning_key=conditioning_key, *args, **kwargs)
|
super().__init__(conditioning_key=conditioning_key, scheduler_config=scheduler_config, *args, **kwargs)
|
||||||
self.concat_mode = concat_mode
|
self.concat_mode = concat_mode
|
||||||
self.cond_stage_trainable = cond_stage_trainable
|
self.cond_stage_trainable = cond_stage_trainable
|
||||||
self.cond_stage_key = cond_stage_key
|
self.cond_stage_key = cond_stage_key
|
||||||
|
@ -704,8 +705,6 @@ class LatentDiffusion(DDPM):
|
||||||
if cond_key != self.first_stage_key:
|
if cond_key != self.first_stage_key:
|
||||||
if cond_key in ['caption', 'coordinates_bbox']:
|
if cond_key in ['caption', 'coordinates_bbox']:
|
||||||
xc = batch[cond_key]
|
xc = batch[cond_key]
|
||||||
elif cond_key == 'class_label':
|
|
||||||
xc = batch
|
|
||||||
else:
|
else:
|
||||||
xc = super().get_input(batch, cond_key).to(self.device)
|
xc = super().get_input(batch, cond_key).to(self.device)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
import ldm.data.aspects as aspects
|
||||||
|
|
||||||
|
resolutions = [512, 576, 640, 704, 768]
|
||||||
|
oops = [532, 576, 640, 704, 768]
|
||||||
|
|
||||||
|
for res in resolutions:
|
||||||
|
example_aspects = aspects.get_aspect_buckets(res)
|
||||||
|
print(f" *{res} buckets: {example_aspects}")
|
||||||
|
|
||||||
|
max_pixels = example_aspects[0][0] * example_aspects[0][1]
|
||||||
|
|
||||||
|
for aspect in example_aspects:
|
||||||
|
pixels = aspect[0] * aspect[1]
|
||||||
|
print (f"max: {max_pixels}: {aspect}: {pixels}, pct {pixels/max_pixels:.2f}")
|
||||||
|
assert pixels <= max_pixels, f" * {aspect} is larger than {max_pixels}"
|
|
@ -0,0 +1,36 @@
|
||||||
|
# script to test data loader by itself
|
||||||
|
# run from training root, edit the data_root manually
|
||||||
|
|
||||||
|
from ldm.data.every_dream import EveryDreamBatch
|
||||||
|
import time
|
||||||
|
|
||||||
|
s = time.perf_counter()
|
||||||
|
|
||||||
|
#data_root = "r:/everydream-trainer/test/input"
|
||||||
|
data_root = "r:/everydream-trainer/training_samples"
|
||||||
|
|
||||||
|
batch_size = 6
|
||||||
|
repeats=3
|
||||||
|
every_dream_batch = EveryDreamBatch(data_root=data_root, flip_p=0.0, debug_level=2, batch_size=batch_size, repeats=repeats, crop_jitter=25, conditional_dropout=0.3, resolution=512)
|
||||||
|
|
||||||
|
print(f" *TEST* EveryDreamBatch epoch image length: {len(every_dream_batch)}")
|
||||||
|
print(f" max test cycles: {int(len(every_dream_batch) / batch_size)}, batch_size: {batch_size}, repeats: {repeats}")
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
while i < 99: # and i < len(every_dream_batch):
|
||||||
|
curr_batch = []
|
||||||
|
for j in range(i,i+batch_size):
|
||||||
|
curr_batch.append(every_dream_batch[j])
|
||||||
|
|
||||||
|
# all in batch must have the same image size
|
||||||
|
assert all(x == curr_batch[0]['image'].shape for x in [e['image'].shape for e in curr_batch])
|
||||||
|
assert all(x[0] > 2 for x in [e['image'].shape for e in curr_batch])
|
||||||
|
|
||||||
|
#print(f"idx: {i}, batch sample: shape: {curr_batch[0]['image'].shape}: {curr_batch[0]['caption']}")
|
||||||
|
|
||||||
|
i += batch_size
|
||||||
|
|
||||||
|
print(f" *TEST* test cycles: {i}")
|
||||||
|
print(f" *TEST* EveryDreamBatch epoch image length: {len(every_dream_batch)}")
|
||||||
|
elapsed = time.perf_counter() - s
|
||||||
|
print(f"{__file__} executed in {elapsed:5.2f} seconds.")
|
|
@ -0,0 +1,48 @@
|
||||||
|
# script to what cropping does to your images
|
||||||
|
# execute from root everydream-trainer folder
|
||||||
|
# ex.
|
||||||
|
# (everydream) R:\everydream-trainer>python scripts/test_crop.py
|
||||||
|
# dumps to /test/output
|
||||||
|
|
||||||
|
from ldm.data.every_dream import EveryDreamBatch
|
||||||
|
import time
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--data_root', type=str, default=None, help='root path of all your training images, will be recursively searched for images')
|
||||||
|
parser.add_argument('--resolution', type=int, default=512, help='resolution class, 512, 576, 640, 704, or 768')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
s = time.perf_counter()
|
||||||
|
|
||||||
|
# put in your own data_root here, WARNING don't do this on a lot of images unless you are prepared for it...
|
||||||
|
if args.data_root is None:
|
||||||
|
data_root = "R:/everydream-trainer/test/input"
|
||||||
|
else:
|
||||||
|
data_root = args.data_root
|
||||||
|
|
||||||
|
debug_level = 3 # 3 = dump images to disk after cropping and a bunch of crap into the console be warned
|
||||||
|
batch_size = 1
|
||||||
|
repeats = 1
|
||||||
|
crop_jitter = 50
|
||||||
|
resolution = args.resolution # 512, 576, 640, 704, 768
|
||||||
|
every_dream_batch = EveryDreamBatch(data_root=data_root, flip_p=0.0, debug_level=3, batch_size=batch_size, repeats=repeats, crop_jitter=crop_jitter, conditional_dropout=0.1, resolution=resolution)
|
||||||
|
|
||||||
|
print(f" *TEST* EveryDreamBatch epoch image length: {len(every_dream_batch)}")
|
||||||
|
print(f" max test cycles: {int(len(every_dream_batch) / batch_size)}, batch_size: {batch_size}, repeats: {repeats}")
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
while i < len(every_dream_batch):
|
||||||
|
curr_batch = []
|
||||||
|
for j in range(i,i+batch_size):
|
||||||
|
curr_batch.append(every_dream_batch[j])
|
||||||
|
|
||||||
|
assert all(x == curr_batch[0]['image'].shape for x in [e['image'].shape for e in curr_batch])
|
||||||
|
assert all(x[0] > 2 for x in [e['image'].shape for e in curr_batch])
|
||||||
|
|
||||||
|
i += batch_size
|
||||||
|
|
||||||
|
print(f" *TEST* test cycles: {i}")
|
||||||
|
print(f" *TEST* EveryDreamBatch epoch image length: {len(every_dream_batch)}")
|
||||||
|
elapsed = time.perf_counter() - s
|
||||||
|
print(f"{__file__} executed in {elapsed:5.2f} seconds.")
|
|
@ -0,0 +1,18 @@
|
||||||
|
# script to test data loader by itself
|
||||||
|
# run from training root, edit the data_root manually
|
||||||
|
# python ldm/data/test_dl.py
|
||||||
|
import ldm.data.data_loader as dl
|
||||||
|
|
||||||
|
data_root = "r:/everydream-trainer/test/input"
|
||||||
|
|
||||||
|
data_loader = dl.DataLoaderMultiAspect(data_root=data_root, batch_size=2, seed=555, debug_level=2)
|
||||||
|
|
||||||
|
image_caption_pairs = data_loader.get_all_images()
|
||||||
|
|
||||||
|
print(f"Loaded {len(image_caption_pairs)} image-caption pairs")
|
||||||
|
|
||||||
|
for image_caption_pair in image_caption_pairs:
|
||||||
|
print(image_caption_pair)
|
||||||
|
|
||||||
|
|
||||||
|
print(f"**** Done loading. Loaded {len(image_caption_pairs)} images from data_root: {data_root} ****")
|
|
@ -0,0 +1,24 @@
|
||||||
|
# script to test data loader by itself
|
||||||
|
# run from training root, edit the data_root manually
|
||||||
|
# python ldm/data/test_dl.py
|
||||||
|
from ldm.data.image_train_item import ImageTrainItem
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
|
||||||
|
data_root = "training_samples\multiaspect"
|
||||||
|
|
||||||
|
for idx, f in enumerate(glob.iglob(f"{data_root}/*.jpg")):
|
||||||
|
for i in range(0, 40):
|
||||||
|
#print(f)
|
||||||
|
#image: PIL.Image, caption: str, target_wh: list, pathname: str, flip_p=0.0):
|
||||||
|
caption = os.path.basename(f)
|
||||||
|
caption = os.path.splitext(caption)[0]
|
||||||
|
my_iti = ImageTrainItem(None,caption,[512,512],f,0.0)
|
||||||
|
|
||||||
|
my_iti = my_iti.hydrate()
|
||||||
|
|
||||||
|
out_file_path = os.path.join(data_root, "output", f"{caption}_{i}.jpg")
|
||||||
|
#print(out_file_path)
|
||||||
|
my_iti.cropped_img.save(out_file_path)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
import ldm.lr_scheduler as lrs
|
||||||
|
|
||||||
|
#def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths, verbosity_interval=0):
|
||||||
|
sch = lrs.EveryDreamScheduler(warm_up_steps=10, f_min=5.0e-1, f_max=1.0, f_start=1.0, steps_to_min=25, verbosity_interval=5)
|
||||||
|
|
||||||
|
for i in range(50):
|
||||||
|
print(f"step {i}: {sch(i)}")
|
Loading…
Reference in New Issue