From 18d1da04593221bb5653e3bb5e6f3b6aa9150fad Mon Sep 17 00:00:00 2001
From: Victor Hall <victor.charles.hall@gmail.com>
Date: Sun, 22 Jan 2023 18:59:59 -0500
Subject: [PATCH 1/5] bug fix and multiplytxt fraction stuff

---
 Train_Colab.ipynb   | 12 +++++++++---
 data/data_loader.py | 36 +++++++++++++++++++++++++++---------
 data/every_dream.py |  3 +--
 train.py            | 22 ++++++++++++++++++++--
 4 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/Train_Colab.ipynb b/Train_Colab.ipynb
index cd1e9b0..e1f5830 100644
--- a/Train_Colab.ipynb
+++ b/Train_Colab.ipynb
@@ -225,7 +225,7 @@
         "#@title Resume from a diffusers model saved to your Gdrive\n",
         "#@markdown * if you have preveiously saved diffusers on your drive you can slect them here\n",
         "#@markdown ex. */content/drive/MyDrive/everydreamlogs/myproject_202208/ckpts/interrupted-gs023*\n",
-        "Resume_Model = \"\" #@param{type:\"string\"} \n",
+        "Resume_Model = \"/content/drive/MyDrive/everydreamlogs/ckpt/SD15\" #@param{type:\"string\"} \n",
         "save_name= Resume_Model"
       ]
     },
@@ -268,7 +268,8 @@
         "#@markdown * Name your project so you can find it in your logs\n",
         "Project_Name = \"my_project\" #@param{type: 'string'}\n",
         "\n",
-        "#@markdown * The learning rate affects how much \"training\" is done on the model per training step. It is a very careful balance to select a value that will learn your data. See Advanced Tweaking for more info. Once you have started, the learning rate is a good first knob to turn as you move into more advanced tweaking.\n",
+        "#@markdown * The learning rate affects how much \"training\" is done on the model per training step. It is a very careful balance to select a value that will learn your data and not wreck the model. \n",
+        "#@markdown Leave this default unless you are very comfortable with training and know what you are doing.\n",
         "\n",
         "Learning_Rate = 1e-6 #@param{type: 'number'}\n",
         "\n",
@@ -294,6 +295,8 @@
         "#@markdown * Remember more gradient accumulation (or batch size) doesn't automatically mean better\n",
         "\n",
         "Gradient_steps = 1 #@param{type:\"slider\", min:1, max:10, step:1}\n",
+        "\n",
+        "#@markdown * Location on your Gdrive where your training images are.\n",
         "Dataset_Location = \"/content/drive/MyDrive/training_samples\" #@param {type:\"string\"}\n",
         "dataset = Dataset_Location\n",
         "model = save_name\n",
@@ -309,8 +312,11 @@
         "\n",
         "#@markdown You can set your own sample prompts by adding them, one line at a time, to `/content/EveryDream2trainer/sample_prompts.txt`.  If left empty, it will use the captions from your training images.\n",
         "\n",
+        "#@markdown Use the steps_between_samples to set how often the samples are generated.\n",
         "Steps_between_samples = 300 #@param{type:\"integer\"}\n",
         "\n",
+        "#@markdown * That's it!  Run the cell!\n",
+        "\n",
         "Drive=\"\"\n",
         "if Save_to_Gdrive:\n",
         "  Drive = \"--logdir /content/drive/MyDrive/everydreamlogs --save_ckpt_dir /content/drive/MyDrive/everydreamlogs/ckpt\"\n",
@@ -347,7 +353,7 @@
         "  $DX \\\n",
         "  --amp \\\n",
         "  --batch_size $Batch_Size \\\n",
-        "  --grad_accum 2 \\\n",
+        "  --grad_accum $Gradient_steps \\\n",
         "  --cond_dropout 0.00 \\\n",
         "  --data_root \"$dataset\" \\\n",
         "  --flip_p 0.00 \\\n",
diff --git a/data/data_loader.py b/data/data_loader.py
index 60edce2..e7afd6d 100644
--- a/data/data_loader.py
+++ b/data/data_loader.py
@@ -17,6 +17,7 @@ import bisect
 import math
 import os
 import logging
+import copy
 
 import yaml
 from PIL import Image
@@ -58,6 +59,7 @@ class DataLoaderMultiAspect():
         self.__recurse_data_root(self=self, recurse_root=data_root)
         random.Random(seed).shuffle(self.image_paths)
         self.prepared_train_data = self.__prescan_images(self.image_paths, flip_p)
+        print(f"DLMA Loaded {len(self.prepared_train_data)} images")
         (self.rating_overall_sum, self.ratings_summed) = self.__sort_and_precalc_image_ratings()
 
 
@@ -65,32 +67,37 @@ class DataLoaderMultiAspect():
         """
         Deals with multiply.txt whole and fractional numbers
         """
-        prepared_train_data_local = self.prepared_train_data.copy()
+        #print(f"Picking multiplied set from {len(self.prepared_train_data)}")
+        data_copy = copy.deepcopy(self.prepared_train_data) # deep copy to avoid modifying original multiplier property
         epoch_size = len(self.prepared_train_data)
         picked_images = []
 
         # add by whole number part first and decrement multiplier in copy
-        for iti in prepared_train_data_local:
+        for iti in data_copy:
+            #print(f"check for whole number {iti.multiplier}: {iti.pathname}, remaining {iti.multiplier}")
             while iti.multiplier >= 1.0:
                 picked_images.append(iti)
-                iti.multiplier -= 1
-            if iti.multiplier == 0:
-                prepared_train_data_local.remove(iti)
+                #print(f"Adding {iti.multiplier}: {iti.pathname}, remaining {iti.multiplier}, , datalen: {len(picked_images)}")
+                iti.multiplier -= 1.0
 
         remaining = epoch_size - len(picked_images)
 
         assert remaining >= 0, "Something went wrong with the multiplier calculation"
+        #print(f"Remaining to fill epoch after whole number adds: {remaining}")
+        #print(f"Remaining in data copy: {len(data_copy)}")
 
         # add by renaming fractional numbers by random chance
         while remaining > 0:
-            for iti in prepared_train_data_local:
-                if randomizer.uniform(0.0, 1) < iti.multiplier:
+            for iti in data_copy:
+                if randomizer.uniform(0.0, 1.0) < iti.multiplier:
+                    #print(f"Adding {iti.multiplier}: {iti.pathname}, remaining {remaining}, datalen: {len(data_copy)}")
                     picked_images.append(iti)
                     remaining -= 1
-                    prepared_train_data_local.remove(iti)
+                    data_copy.remove(iti)
                 if remaining <= 0:
                     break
         
+        del data_copy
         return picked_images
 
     def get_shuffled_image_buckets(self, dropout_fraction: float = 1.0):
@@ -239,6 +246,7 @@ class DataLoaderMultiAspect():
 
         multipliers = {}
         skip_folders = []
+        randomizer = random.Random(self.seed)
 
         for pathname in tqdm.tqdm(image_paths):
             caption_from_filename = os.path.splitext(os.path.basename(pathname))[0].split("_")[0]
@@ -293,7 +301,15 @@ class DataLoaderMultiAspect():
                                                   multiplier=multipliers[current_dir],
                                                  )
 
-                decorated_image_train_items.append(image_train_item)
+                cur_file_multiplier = multipliers[current_dir]
+
+                while cur_file_multiplier >= 1.0:
+                    decorated_image_train_items.append(image_train_item)
+                    cur_file_multiplier -= 1
+                
+                if cur_file_multiplier > 0:
+                    if randomizer.random() < cur_file_multiplier:
+                        decorated_image_train_items.append(image_train_item)
                 
             except Exception as e:
                 logging.error(f"{Fore.LIGHTRED_EX} *** Error opening {Fore.LIGHTYELLOW_EX}{pathname}{Fore.LIGHTRED_EX} to get metadata. File may be corrupt and will be skipped.{Style.RESET_ALL}")
@@ -311,6 +327,8 @@ class DataLoaderMultiAspect():
                     for undersized_image in undersized_images:
                         undersized_images_file.write(f"{undersized_image}\n")
         
+        print (f" * DLMA: {len(decorated_image_train_items)} images loaded from {len(image_paths)} files")
+
         return decorated_image_train_items
 
     def __pick_random_subset(self, dropout_fraction: float, picker: random.Random) -> list[ImageTrainItem]:
diff --git a/data/every_dream.py b/data/every_dream.py
index 18f347f..aaaccd4 100644
--- a/data/every_dream.py
+++ b/data/every_dream.py
@@ -71,7 +71,6 @@ class EveryDreamBatch(Dataset):
         self.rated_dataset = rated_dataset
         self.rated_dataset_dropout_target = rated_dataset_dropout_target
 
-
         if seed == -1:
             seed = random.randint(0, 99999)
         
@@ -85,7 +84,7 @@ class EveryDreamBatch(Dataset):
                                          resolution=resolution,
                                          log_folder=self.log_folder,
                                         )
-        
+
         self.image_train_items = dls.shared_dataloader.get_shuffled_image_buckets(1.0) # First epoch always trains on all images
 
         num_images = len(self.image_train_items)
diff --git a/train.py b/train.py
index 75cd555..31807ad 100644
--- a/train.py
+++ b/train.py
@@ -367,7 +367,6 @@ def main(args):
             safety_checker=None, # save vram
             requires_safety_checker=None, # avoid nag
             feature_extractor=None, # must be none of no safety checker
-            disable_tqdm=True,
         )
 
         return pipe
@@ -410,6 +409,8 @@ def main(args):
         generates samples at different cfg scales and saves them to disk
         """
         logging.info(f"Generating samples gs:{gs}, for {prompts}")
+        pipe.set_progress_bar_config(disable=True)
+
         seed = args.seed if args.seed != -1 else random.randint(0, 2**30)
         gen = torch.Generator(device=device).manual_seed(seed)
 
@@ -691,6 +692,24 @@ def main(args):
     assert len(train_batch) > 0, "train_batch is empty, check that your data_root is correct"
     
     try:
+        # # dummy batch to pin memory to avoid fragmentation in torch, uses square aspect which is maximum bytes size per aspects.py
+        # pixel_values = torch.randn_like(torch.zeros([args.batch_size, 3, args.resolution, args.resolution]))
+        # pixel_values = pixel_values.to(unet.device)
+        # with autocast(enabled=args.amp):
+        #     latents = vae.encode(pixel_values, return_dict=False)
+        # latents = latents[0].sample() * 0.18215
+        # noise = torch.randn_like(latents)
+        # bsz = latents.shape[0]
+        # timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device)
+        # timesteps = timesteps.long()
+        # noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
+        # cuda_caption = torch.linspace(100,177, steps=77, dtype=int).to(text_encoder.device)
+        # encoder_hidden_states = text_encoder(cuda_caption, output_hidden_states=True).last_hidden_state
+        # with autocast(enabled=args.amp):
+        #     model_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample
+        # # discard the grads, just want to pin memory
+        # optimizer.zero_grad(set_to_none=True)
+
         for epoch in range(args.max_epochs):
             loss_epoch = []
             epoch_start_time = time.time()
@@ -801,7 +820,6 @@ def main(args):
                 if (global_step + 1) % args.sample_steps == 0:
                     pipe = __create_inference_pipe(unet=unet, text_encoder=text_encoder, tokenizer=tokenizer, scheduler=sample_scheduler, vae=vae)
                     pipe = pipe.to(device)
-                    #pipe.set_progress_bar_config(progress_bar=False)
 
                     with torch.no_grad():
                         if sample_prompts is not None and len(sample_prompts) > 0 and len(sample_prompts[0]) > 1:

From a11ff5ef246ca425b01b4c22ccad49d010499fae Mon Sep 17 00:00:00 2001
From: nawnie <106923464+nawnie@users.noreply.github.com>
Date: Sun, 22 Jan 2023 18:13:31 -0600
Subject: [PATCH 2/5] Update Train_Colab.ipynb

adds the chaining to colab, this was easier then erasing my fork
---
 Train_Colab.ipynb | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/Train_Colab.ipynb b/Train_Colab.ipynb
index e1f5830..1c90129 100644
--- a/Train_Colab.ipynb
+++ b/Train_Colab.ipynb
@@ -382,8 +382,21 @@
       "outputs": [],
       "source": [
         "#@title Alternate startup script\n",
-        "#@markdown Edit train.json to setup your paramaters\n",
-        "!python train.py --config train.json"
+        "#@markdown * Edit train.json to setup your paramaters\n",
+        "#@markdown * Edit chain0.json to make use of chaining\n",
+        "#@markdown * make sure to check each confguration you will need 1 Json per chain length 3 are provided\n",
+        "\n",
+        "\n",
+        "%cd /content/EveryDream2trainer\n",
+        "Chain_Length=0 #@param{type:\"integer\"}\n",
+        "l = Chain_Length \n",
+        "I=0 #repeat counter\n",
+        "if l == None or l == 0:\n",
+        "  l=1\n",
+        "while l > 0:\n",
+        "  !python train_colab.py --config chain{I}.json\n",
+        "  l -= 1\n",
+        "  I =+ 1"
       ]
     }
   ],

From 9f5f773c33e604119d38311800799cf2e0481fa6 Mon Sep 17 00:00:00 2001
From: Victor Hall <victor.charles.hall@gmail.com>
Date: Sun, 22 Jan 2023 21:43:03 -0500
Subject: [PATCH 3/5] remove saving yaml for sd1x models, unneeded

---
 train.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/train.py b/train.py
index 31807ad..25322d9 100644
--- a/train.py
+++ b/train.py
@@ -1,5 +1,5 @@
 """
-Copyright [2022] Victor C Hall
+Copyright [2022-2023] Victor C Hall
 
 Licensed under the GNU Affero General Public License;
 You may not use this code except in compliance with the License.
@@ -343,7 +343,7 @@ def main(args):
         logging.info(f" * Saving SD model to {sd_ckpt_full}")
         converter(model_path=save_path, checkpoint_path=sd_ckpt_full, half=half)
 
-        if yaml_name:
+        if yaml_name and yaml_name != "v1-inference.yaml":
             yaml_save_path = f"{os.path.join(save_ckpt_dir, os.path.basename(save_path))}.yaml"
             logging.info(f" * Saving yaml to {yaml_save_path}")
             shutil.copyfile(yaml_name, yaml_save_path)
@@ -589,7 +589,7 @@ def main(args):
 
     """
     print(f" {Fore.LIGHTGREEN_EX}** Welcome to EveryDream trainer 2.0!**{Style.RESET_ALL}")
-    print(f" (C) 2022 Victor C Hall  This program is licensed under AGPL 3.0 https://www.gnu.org/licenses/agpl-3.0.en.html")
+    print(f" (C) 2022-2023 Victor C Hall  This program is licensed under AGPL 3.0 https://www.gnu.org/licenses/agpl-3.0.en.html")
     print()
     print("** Trainer Starting **")
 

From 6630ea7a50caf86ad6321f2640b9b8a4628318aa Mon Sep 17 00:00:00 2001
From: Victor Hall <victor.charles.hall@gmail.com>
Date: Mon, 23 Jan 2023 00:12:03 -0500
Subject: [PATCH 4/5] links for colab notebook

---
 Train_Colab.ipynb | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Train_Colab.ipynb b/Train_Colab.ipynb
index 1c90129..b666312 100644
--- a/Train_Colab.ipynb
+++ b/Train_Colab.ipynb
@@ -12,12 +12,17 @@
       ]
     },
     {
+      "attachments": {},
       "cell_type": "markdown",
       "metadata": {
         "id": "blaLMSbkPHhG"
       },
       "source": [
-        "EveryDream2 Colab Edition"
+        "# EveryDream2 Colab Edition\n",
+        "\n",
+        "Check out documentation here: https://github.com/victorchall/EveryDream2trainer#docs\n",
+        "\n",
+        "And join the discord: https://discord.gg/uheqxU6sXN"
       ]
     },
     {

From 993eabf99ad7cd72963341e05d6d0ad67df37ac7 Mon Sep 17 00:00:00 2001
From: Joel Holdbrooks <cjholdbrooks@gmail.com>
Date: Sun, 22 Jan 2023 16:08:50 -0800
Subject: [PATCH 5/5] Add static methods on ImageCaption for deriving captions
 from various sources

---
 data/image_train_item.py | 152 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 146 insertions(+), 6 deletions(-)

diff --git a/data/image_train_item.py b/data/image_train_item.py
index 08bf736..9d1bdcb 100644
--- a/data/image_train_item.py
+++ b/data/image_train_item.py
@@ -18,6 +18,8 @@ import logging
 import math
 import os
 import random
+import typing
+import yaml
 
 import PIL
 import numpy as np
@@ -25,6 +27,9 @@ from torchvision import transforms
 
 _RANDOM_TRIM = 0.04
 
+DEFAULT_MAX_CAPTION_LENGTH = 2048
+
+OptionalImageCaption = typing.Optional['ImageCaption']
 
 class ImageCaption:
     """
@@ -60,13 +65,15 @@ class ImageCaption:
         :param seed used to initialize the randomizer
         :return: generated caption string
         """
-        max_target_tag_length = self.__max_target_length - len(self.__main_prompt)
+        if self.__tags:
+            max_target_tag_length = self.__max_target_length - len(self.__main_prompt)
 
-        if self.__use_weights:
-            tags_caption = self.__get_weighted_shuffled_tags(seed, self.__tags, self.__tag_weights, max_target_tag_length)
-        else:
-            tags_caption = self.__get_shuffled_tags(seed, self.__tags)
+            if self.__use_weights:
+                tags_caption = self.__get_weighted_shuffled_tags(seed, self.__tags, self.__tag_weights, max_target_tag_length)
+            else:
+                tags_caption = self.__get_shuffled_tags(seed, self.__tags)
 
+            return self.__main_prompt + ", " + tags_caption
         return self.__main_prompt + ", " + tags_caption
 
     def get_caption(self) -> str:
@@ -91,7 +98,10 @@ class ImageCaption:
 
             weights_copy.pop(pos)
             tag = tags_copy.pop(pos)
-            caption += ", " + tag
+            
+            if caption:
+                caption += ", "
+            caption += tag
 
         return caption
 
@@ -100,6 +110,136 @@ class ImageCaption:
         random.Random(seed).shuffle(tags)
         return ", ".join(tags)
 
+    @staticmethod
+    def parse(string: str) -> 'ImageCaption':
+        """
+        Parses a string to get the caption.
+
+        :param string: String to parse.
+        :return: `ImageCaption` object.
+        """
+        split_caption = list(map(str.strip, string.split(",")))
+        main_prompt = split_caption[0]
+        tags = split_caption[1:]
+        tag_weights = [1.0] * len(tags)
+
+        return ImageCaption(main_prompt, 1.0, tags, tag_weights, DEFAULT_MAX_CAPTION_LENGTH, False)
+    
+    @staticmethod
+    def from_file_name(file_path: str) -> 'ImageCaption':
+        """
+        Parses the file name to get the caption.
+        
+        :param file_path: Path to the image file.
+        :return: `ImageCaption` object.
+        """
+        (file_name, _) = os.path.splitext(os.path.basename(file_path))
+        caption = file_name.split("_")[0]
+        return ImageCaption(caption, 1.0, [], [], DEFAULT_MAX_CAPTION_LENGTH, False)
+    
+    @staticmethod
+    def from_text_file(file_path: str, default_caption: OptionalImageCaption=None) -> OptionalImageCaption:
+        """
+        Parses a text file to get the caption. Returns the default caption if
+        the file does not exist or is invalid.
+        
+        :param file_path: Path to the text file.
+        :param default_caption: Optional `ImageCaption` to return if the file does not exist or is invalid.
+        :return: `ImageCaption` object or `None`.
+        """
+        try:
+            with open(file_path, encoding='utf-8', mode='r') as caption_file:
+                caption_text = caption_file.read()
+                return ImageCaption.parse(caption_text)
+        except:
+            logging.error(f" *** Error reading {file_path} to get caption")
+            return default_caption
+        
+    @staticmethod
+    def from_yaml_file(file_path: str, default_caption: OptionalImageCaption=None) -> OptionalImageCaption:
+        """
+        Parses a yaml file to get the caption. Returns the default caption if
+        the file does not exist or is invalid.
+        
+        :param file_path: path to the yaml file
+        :param default_caption: caption to return if the file does not exist or is invalid
+        :return: `ImageCaption` object or `None`.
+        """
+        try:
+            with open(file_path, "r") as stream:
+                file_content = yaml.safe_load(stream)
+                main_prompt = file_content.get("main_prompt", "")
+                rating = file_content.get("rating", 1.0)
+                unparsed_tags = file_content.get("tags", [])
+
+                max_caption_length = file_content.get("max_caption_length", DEFAULT_MAX_CAPTION_LENGTH)
+
+                tags = []
+                tag_weights = []
+                last_weight = None
+                weights_differ = False
+                for unparsed_tag in unparsed_tags:
+                    tag = unparsed_tag.get("tag", "").strip()
+                    if len(tag) == 0:
+                        continue
+
+                    tags.append(tag)
+                    tag_weight = unparsed_tag.get("weight", 1.0)
+                    tag_weights.append(tag_weight)
+
+                    if last_weight is not None and weights_differ is False:
+                        weights_differ = last_weight != tag_weight
+
+                    last_weight = tag_weight
+
+                return ImageCaption(main_prompt, rating, tags, tag_weights, max_caption_length, weights_differ)
+        except:
+            logging.error(f" *** Error reading {file_path} to get caption")
+            return default_caption
+        
+    @staticmethod
+    def from_file(file_path: str, default_caption: OptionalImageCaption=None) -> OptionalImageCaption:
+        """
+        Try to resolve a caption from a file path or return `default_caption`.
+
+        :string: The path to the file to parse.
+        :default_caption: Optional `ImageCaption` to return if the file does not exist or is invalid.
+        :return: `ImageCaption` object or `None`.
+        """
+        if os.path.exists(file_path):
+            (file_path_without_ext, ext) = os.path.splitext(file_path) 
+            match ext:
+                case ".yaml" | ".yml":
+                    return ImageCaption.from_yaml_file(file_path, default_caption)
+                
+                case ".txt" | ".caption":
+                    return ImageCaption.from_text_file(file_path, default_caption)
+                
+                case '.jpg'| '.jpeg'| '.png'| '.bmp'| '.webp'| '.jfif':
+                    for ext in [".yaml", ".yml", ".txt", ".caption"]:
+                        file_path = file_path_without_ext + ext
+                        image_caption = ImageCaption.from_file(file_path)
+                        if image_caption is not None:
+                            return image_caption
+                    return ImageCaption.from_file_name(file_path)
+
+                case _:
+                    return default_caption
+        else:
+            return default_caption
+        
+    @staticmethod
+    def resolve(string: str) -> 'ImageCaption':
+        """
+        Try to resolve a caption from a string. If the string is a file path,
+        the caption will be read from the file, otherwise the string will be
+        parsed as a caption.
+
+        :string: The string to resolve.
+        :return: `ImageCaption` object.
+        """
+        return ImageCaption.from_file(string, None) or ImageCaption.parse(string)
+
 
 class ImageTrainItem:
     """