From 18d1da04593221bb5653e3bb5e6f3b6aa9150fad Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Sun, 22 Jan 2023 18:59:59 -0500 Subject: [PATCH 1/5] bug fix and multiplytxt fraction stuff --- Train_Colab.ipynb | 12 +++++++++--- data/data_loader.py | 36 +++++++++++++++++++++++++++--------- data/every_dream.py | 3 +-- train.py | 22 ++++++++++++++++++++-- 4 files changed, 57 insertions(+), 16 deletions(-) diff --git a/Train_Colab.ipynb b/Train_Colab.ipynb index cd1e9b0..e1f5830 100644 --- a/Train_Colab.ipynb +++ b/Train_Colab.ipynb @@ -225,7 +225,7 @@ "#@title Resume from a diffusers model saved to your Gdrive\n", "#@markdown * if you have preveiously saved diffusers on your drive you can slect them here\n", "#@markdown ex. */content/drive/MyDrive/everydreamlogs/myproject_202208/ckpts/interrupted-gs023*\n", - "Resume_Model = \"\" #@param{type:\"string\"} \n", + "Resume_Model = \"/content/drive/MyDrive/everydreamlogs/ckpt/SD15\" #@param{type:\"string\"} \n", "save_name= Resume_Model" ] }, @@ -268,7 +268,8 @@ "#@markdown * Name your project so you can find it in your logs\n", "Project_Name = \"my_project\" #@param{type: 'string'}\n", "\n", - "#@markdown * The learning rate affects how much \"training\" is done on the model per training step. It is a very careful balance to select a value that will learn your data. See Advanced Tweaking for more info. Once you have started, the learning rate is a good first knob to turn as you move into more advanced tweaking.\n", + "#@markdown * The learning rate affects how much \"training\" is done on the model per training step. It is a very careful balance to select a value that will learn your data and not wreck the model. \n", + "#@markdown Leave this default unless you are very comfortable with training and know what you are doing.\n", "\n", "Learning_Rate = 1e-6 #@param{type: 'number'}\n", "\n", @@ -294,6 +295,8 @@ "#@markdown * Remember more gradient accumulation (or batch size) doesn't automatically mean better\n", "\n", "Gradient_steps = 1 #@param{type:\"slider\", min:1, max:10, step:1}\n", + "\n", + "#@markdown * Location on your Gdrive where your training images are.\n", "Dataset_Location = \"/content/drive/MyDrive/training_samples\" #@param {type:\"string\"}\n", "dataset = Dataset_Location\n", "model = save_name\n", @@ -309,8 +312,11 @@ "\n", "#@markdown You can set your own sample prompts by adding them, one line at a time, to `/content/EveryDream2trainer/sample_prompts.txt`. If left empty, it will use the captions from your training images.\n", "\n", + "#@markdown Use the steps_between_samples to set how often the samples are generated.\n", "Steps_between_samples = 300 #@param{type:\"integer\"}\n", "\n", + "#@markdown * That's it! Run the cell!\n", + "\n", "Drive=\"\"\n", "if Save_to_Gdrive:\n", " Drive = \"--logdir /content/drive/MyDrive/everydreamlogs --save_ckpt_dir /content/drive/MyDrive/everydreamlogs/ckpt\"\n", @@ -347,7 +353,7 @@ " $DX \\\n", " --amp \\\n", " --batch_size $Batch_Size \\\n", - " --grad_accum 2 \\\n", + " --grad_accum $Gradient_steps \\\n", " --cond_dropout 0.00 \\\n", " --data_root \"$dataset\" \\\n", " --flip_p 0.00 \\\n", diff --git a/data/data_loader.py b/data/data_loader.py index 60edce2..e7afd6d 100644 --- a/data/data_loader.py +++ b/data/data_loader.py @@ -17,6 +17,7 @@ import bisect import math import os import logging +import copy import yaml from PIL import Image @@ -58,6 +59,7 @@ class DataLoaderMultiAspect(): self.__recurse_data_root(self=self, recurse_root=data_root) random.Random(seed).shuffle(self.image_paths) self.prepared_train_data = self.__prescan_images(self.image_paths, flip_p) + print(f"DLMA Loaded {len(self.prepared_train_data)} images") (self.rating_overall_sum, self.ratings_summed) = self.__sort_and_precalc_image_ratings() @@ -65,32 +67,37 @@ class DataLoaderMultiAspect(): """ Deals with multiply.txt whole and fractional numbers """ - prepared_train_data_local = self.prepared_train_data.copy() + #print(f"Picking multiplied set from {len(self.prepared_train_data)}") + data_copy = copy.deepcopy(self.prepared_train_data) # deep copy to avoid modifying original multiplier property epoch_size = len(self.prepared_train_data) picked_images = [] # add by whole number part first and decrement multiplier in copy - for iti in prepared_train_data_local: + for iti in data_copy: + #print(f"check for whole number {iti.multiplier}: {iti.pathname}, remaining {iti.multiplier}") while iti.multiplier >= 1.0: picked_images.append(iti) - iti.multiplier -= 1 - if iti.multiplier == 0: - prepared_train_data_local.remove(iti) + #print(f"Adding {iti.multiplier}: {iti.pathname}, remaining {iti.multiplier}, , datalen: {len(picked_images)}") + iti.multiplier -= 1.0 remaining = epoch_size - len(picked_images) assert remaining >= 0, "Something went wrong with the multiplier calculation" + #print(f"Remaining to fill epoch after whole number adds: {remaining}") + #print(f"Remaining in data copy: {len(data_copy)}") # add by renaming fractional numbers by random chance while remaining > 0: - for iti in prepared_train_data_local: - if randomizer.uniform(0.0, 1) < iti.multiplier: + for iti in data_copy: + if randomizer.uniform(0.0, 1.0) < iti.multiplier: + #print(f"Adding {iti.multiplier}: {iti.pathname}, remaining {remaining}, datalen: {len(data_copy)}") picked_images.append(iti) remaining -= 1 - prepared_train_data_local.remove(iti) + data_copy.remove(iti) if remaining <= 0: break + del data_copy return picked_images def get_shuffled_image_buckets(self, dropout_fraction: float = 1.0): @@ -239,6 +246,7 @@ class DataLoaderMultiAspect(): multipliers = {} skip_folders = [] + randomizer = random.Random(self.seed) for pathname in tqdm.tqdm(image_paths): caption_from_filename = os.path.splitext(os.path.basename(pathname))[0].split("_")[0] @@ -293,7 +301,15 @@ class DataLoaderMultiAspect(): multiplier=multipliers[current_dir], ) - decorated_image_train_items.append(image_train_item) + cur_file_multiplier = multipliers[current_dir] + + while cur_file_multiplier >= 1.0: + decorated_image_train_items.append(image_train_item) + cur_file_multiplier -= 1 + + if cur_file_multiplier > 0: + if randomizer.random() < cur_file_multiplier: + decorated_image_train_items.append(image_train_item) except Exception as e: logging.error(f"{Fore.LIGHTRED_EX} *** Error opening {Fore.LIGHTYELLOW_EX}{pathname}{Fore.LIGHTRED_EX} to get metadata. File may be corrupt and will be skipped.{Style.RESET_ALL}") @@ -311,6 +327,8 @@ class DataLoaderMultiAspect(): for undersized_image in undersized_images: undersized_images_file.write(f"{undersized_image}\n") + print (f" * DLMA: {len(decorated_image_train_items)} images loaded from {len(image_paths)} files") + return decorated_image_train_items def __pick_random_subset(self, dropout_fraction: float, picker: random.Random) -> list[ImageTrainItem]: diff --git a/data/every_dream.py b/data/every_dream.py index 18f347f..aaaccd4 100644 --- a/data/every_dream.py +++ b/data/every_dream.py @@ -71,7 +71,6 @@ class EveryDreamBatch(Dataset): self.rated_dataset = rated_dataset self.rated_dataset_dropout_target = rated_dataset_dropout_target - if seed == -1: seed = random.randint(0, 99999) @@ -85,7 +84,7 @@ class EveryDreamBatch(Dataset): resolution=resolution, log_folder=self.log_folder, ) - + self.image_train_items = dls.shared_dataloader.get_shuffled_image_buckets(1.0) # First epoch always trains on all images num_images = len(self.image_train_items) diff --git a/train.py b/train.py index 75cd555..31807ad 100644 --- a/train.py +++ b/train.py @@ -367,7 +367,6 @@ def main(args): safety_checker=None, # save vram requires_safety_checker=None, # avoid nag feature_extractor=None, # must be none of no safety checker - disable_tqdm=True, ) return pipe @@ -410,6 +409,8 @@ def main(args): generates samples at different cfg scales and saves them to disk """ logging.info(f"Generating samples gs:{gs}, for {prompts}") + pipe.set_progress_bar_config(disable=True) + seed = args.seed if args.seed != -1 else random.randint(0, 2**30) gen = torch.Generator(device=device).manual_seed(seed) @@ -691,6 +692,24 @@ def main(args): assert len(train_batch) > 0, "train_batch is empty, check that your data_root is correct" try: + # # dummy batch to pin memory to avoid fragmentation in torch, uses square aspect which is maximum bytes size per aspects.py + # pixel_values = torch.randn_like(torch.zeros([args.batch_size, 3, args.resolution, args.resolution])) + # pixel_values = pixel_values.to(unet.device) + # with autocast(enabled=args.amp): + # latents = vae.encode(pixel_values, return_dict=False) + # latents = latents[0].sample() * 0.18215 + # noise = torch.randn_like(latents) + # bsz = latents.shape[0] + # timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device) + # timesteps = timesteps.long() + # noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps) + # cuda_caption = torch.linspace(100,177, steps=77, dtype=int).to(text_encoder.device) + # encoder_hidden_states = text_encoder(cuda_caption, output_hidden_states=True).last_hidden_state + # with autocast(enabled=args.amp): + # model_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample + # # discard the grads, just want to pin memory + # optimizer.zero_grad(set_to_none=True) + for epoch in range(args.max_epochs): loss_epoch = [] epoch_start_time = time.time() @@ -801,7 +820,6 @@ def main(args): if (global_step + 1) % args.sample_steps == 0: pipe = __create_inference_pipe(unet=unet, text_encoder=text_encoder, tokenizer=tokenizer, scheduler=sample_scheduler, vae=vae) pipe = pipe.to(device) - #pipe.set_progress_bar_config(progress_bar=False) with torch.no_grad(): if sample_prompts is not None and len(sample_prompts) > 0 and len(sample_prompts[0]) > 1: From a11ff5ef246ca425b01b4c22ccad49d010499fae Mon Sep 17 00:00:00 2001 From: nawnie <106923464+nawnie@users.noreply.github.com> Date: Sun, 22 Jan 2023 18:13:31 -0600 Subject: [PATCH 2/5] Update Train_Colab.ipynb adds the chaining to colab, this was easier then erasing my fork --- Train_Colab.ipynb | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/Train_Colab.ipynb b/Train_Colab.ipynb index e1f5830..1c90129 100644 --- a/Train_Colab.ipynb +++ b/Train_Colab.ipynb @@ -382,8 +382,21 @@ "outputs": [], "source": [ "#@title Alternate startup script\n", - "#@markdown Edit train.json to setup your paramaters\n", - "!python train.py --config train.json" + "#@markdown * Edit train.json to setup your paramaters\n", + "#@markdown * Edit chain0.json to make use of chaining\n", + "#@markdown * make sure to check each confguration you will need 1 Json per chain length 3 are provided\n", + "\n", + "\n", + "%cd /content/EveryDream2trainer\n", + "Chain_Length=0 #@param{type:\"integer\"}\n", + "l = Chain_Length \n", + "I=0 #repeat counter\n", + "if l == None or l == 0:\n", + " l=1\n", + "while l > 0:\n", + " !python train_colab.py --config chain{I}.json\n", + " l -= 1\n", + " I =+ 1" ] } ], From 9f5f773c33e604119d38311800799cf2e0481fa6 Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Sun, 22 Jan 2023 21:43:03 -0500 Subject: [PATCH 3/5] remove saving yaml for sd1x models, unneeded --- train.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/train.py b/train.py index 31807ad..25322d9 100644 --- a/train.py +++ b/train.py @@ -1,5 +1,5 @@ """ -Copyright [2022] Victor C Hall +Copyright [2022-2023] Victor C Hall Licensed under the GNU Affero General Public License; You may not use this code except in compliance with the License. @@ -343,7 +343,7 @@ def main(args): logging.info(f" * Saving SD model to {sd_ckpt_full}") converter(model_path=save_path, checkpoint_path=sd_ckpt_full, half=half) - if yaml_name: + if yaml_name and yaml_name != "v1-inference.yaml": yaml_save_path = f"{os.path.join(save_ckpt_dir, os.path.basename(save_path))}.yaml" logging.info(f" * Saving yaml to {yaml_save_path}") shutil.copyfile(yaml_name, yaml_save_path) @@ -589,7 +589,7 @@ def main(args): """ print(f" {Fore.LIGHTGREEN_EX}** Welcome to EveryDream trainer 2.0!**{Style.RESET_ALL}") - print(f" (C) 2022 Victor C Hall This program is licensed under AGPL 3.0 https://www.gnu.org/licenses/agpl-3.0.en.html") + print(f" (C) 2022-2023 Victor C Hall This program is licensed under AGPL 3.0 https://www.gnu.org/licenses/agpl-3.0.en.html") print() print("** Trainer Starting **") From 6630ea7a50caf86ad6321f2640b9b8a4628318aa Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Mon, 23 Jan 2023 00:12:03 -0500 Subject: [PATCH 4/5] links for colab notebook --- Train_Colab.ipynb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Train_Colab.ipynb b/Train_Colab.ipynb index 1c90129..b666312 100644 --- a/Train_Colab.ipynb +++ b/Train_Colab.ipynb @@ -12,12 +12,17 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": { "id": "blaLMSbkPHhG" }, "source": [ - "EveryDream2 Colab Edition" + "# EveryDream2 Colab Edition\n", + "\n", + "Check out documentation here: https://github.com/victorchall/EveryDream2trainer#docs\n", + "\n", + "And join the discord: https://discord.gg/uheqxU6sXN" ] }, { From 993eabf99ad7cd72963341e05d6d0ad67df37ac7 Mon Sep 17 00:00:00 2001 From: Joel Holdbrooks Date: Sun, 22 Jan 2023 16:08:50 -0800 Subject: [PATCH 5/5] Add static methods on ImageCaption for deriving captions from various sources --- data/image_train_item.py | 152 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 146 insertions(+), 6 deletions(-) diff --git a/data/image_train_item.py b/data/image_train_item.py index 08bf736..9d1bdcb 100644 --- a/data/image_train_item.py +++ b/data/image_train_item.py @@ -18,6 +18,8 @@ import logging import math import os import random +import typing +import yaml import PIL import numpy as np @@ -25,6 +27,9 @@ from torchvision import transforms _RANDOM_TRIM = 0.04 +DEFAULT_MAX_CAPTION_LENGTH = 2048 + +OptionalImageCaption = typing.Optional['ImageCaption'] class ImageCaption: """ @@ -60,13 +65,15 @@ class ImageCaption: :param seed used to initialize the randomizer :return: generated caption string """ - max_target_tag_length = self.__max_target_length - len(self.__main_prompt) + if self.__tags: + max_target_tag_length = self.__max_target_length - len(self.__main_prompt) - if self.__use_weights: - tags_caption = self.__get_weighted_shuffled_tags(seed, self.__tags, self.__tag_weights, max_target_tag_length) - else: - tags_caption = self.__get_shuffled_tags(seed, self.__tags) + if self.__use_weights: + tags_caption = self.__get_weighted_shuffled_tags(seed, self.__tags, self.__tag_weights, max_target_tag_length) + else: + tags_caption = self.__get_shuffled_tags(seed, self.__tags) + return self.__main_prompt + ", " + tags_caption return self.__main_prompt + ", " + tags_caption def get_caption(self) -> str: @@ -91,7 +98,10 @@ class ImageCaption: weights_copy.pop(pos) tag = tags_copy.pop(pos) - caption += ", " + tag + + if caption: + caption += ", " + caption += tag return caption @@ -100,6 +110,136 @@ class ImageCaption: random.Random(seed).shuffle(tags) return ", ".join(tags) + @staticmethod + def parse(string: str) -> 'ImageCaption': + """ + Parses a string to get the caption. + + :param string: String to parse. + :return: `ImageCaption` object. + """ + split_caption = list(map(str.strip, string.split(","))) + main_prompt = split_caption[0] + tags = split_caption[1:] + tag_weights = [1.0] * len(tags) + + return ImageCaption(main_prompt, 1.0, tags, tag_weights, DEFAULT_MAX_CAPTION_LENGTH, False) + + @staticmethod + def from_file_name(file_path: str) -> 'ImageCaption': + """ + Parses the file name to get the caption. + + :param file_path: Path to the image file. + :return: `ImageCaption` object. + """ + (file_name, _) = os.path.splitext(os.path.basename(file_path)) + caption = file_name.split("_")[0] + return ImageCaption(caption, 1.0, [], [], DEFAULT_MAX_CAPTION_LENGTH, False) + + @staticmethod + def from_text_file(file_path: str, default_caption: OptionalImageCaption=None) -> OptionalImageCaption: + """ + Parses a text file to get the caption. Returns the default caption if + the file does not exist or is invalid. + + :param file_path: Path to the text file. + :param default_caption: Optional `ImageCaption` to return if the file does not exist or is invalid. + :return: `ImageCaption` object or `None`. + """ + try: + with open(file_path, encoding='utf-8', mode='r') as caption_file: + caption_text = caption_file.read() + return ImageCaption.parse(caption_text) + except: + logging.error(f" *** Error reading {file_path} to get caption") + return default_caption + + @staticmethod + def from_yaml_file(file_path: str, default_caption: OptionalImageCaption=None) -> OptionalImageCaption: + """ + Parses a yaml file to get the caption. Returns the default caption if + the file does not exist or is invalid. + + :param file_path: path to the yaml file + :param default_caption: caption to return if the file does not exist or is invalid + :return: `ImageCaption` object or `None`. + """ + try: + with open(file_path, "r") as stream: + file_content = yaml.safe_load(stream) + main_prompt = file_content.get("main_prompt", "") + rating = file_content.get("rating", 1.0) + unparsed_tags = file_content.get("tags", []) + + max_caption_length = file_content.get("max_caption_length", DEFAULT_MAX_CAPTION_LENGTH) + + tags = [] + tag_weights = [] + last_weight = None + weights_differ = False + for unparsed_tag in unparsed_tags: + tag = unparsed_tag.get("tag", "").strip() + if len(tag) == 0: + continue + + tags.append(tag) + tag_weight = unparsed_tag.get("weight", 1.0) + tag_weights.append(tag_weight) + + if last_weight is not None and weights_differ is False: + weights_differ = last_weight != tag_weight + + last_weight = tag_weight + + return ImageCaption(main_prompt, rating, tags, tag_weights, max_caption_length, weights_differ) + except: + logging.error(f" *** Error reading {file_path} to get caption") + return default_caption + + @staticmethod + def from_file(file_path: str, default_caption: OptionalImageCaption=None) -> OptionalImageCaption: + """ + Try to resolve a caption from a file path or return `default_caption`. + + :string: The path to the file to parse. + :default_caption: Optional `ImageCaption` to return if the file does not exist or is invalid. + :return: `ImageCaption` object or `None`. + """ + if os.path.exists(file_path): + (file_path_without_ext, ext) = os.path.splitext(file_path) + match ext: + case ".yaml" | ".yml": + return ImageCaption.from_yaml_file(file_path, default_caption) + + case ".txt" | ".caption": + return ImageCaption.from_text_file(file_path, default_caption) + + case '.jpg'| '.jpeg'| '.png'| '.bmp'| '.webp'| '.jfif': + for ext in [".yaml", ".yml", ".txt", ".caption"]: + file_path = file_path_without_ext + ext + image_caption = ImageCaption.from_file(file_path) + if image_caption is not None: + return image_caption + return ImageCaption.from_file_name(file_path) + + case _: + return default_caption + else: + return default_caption + + @staticmethod + def resolve(string: str) -> 'ImageCaption': + """ + Try to resolve a caption from a string. If the string is a file path, + the caption will be read from the file, otherwise the string will be + parsed as a caption. + + :string: The string to resolve. + :return: `ImageCaption` object. + """ + return ImageCaption.from_file(string, None) or ImageCaption.parse(string) + class ImageTrainItem: """