From 120d406355db362afe9777922000639d43f9d973 Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Fri, 11 Nov 2022 17:14:46 -0700
Subject: [PATCH 01/14] Implementation of validation/resize classes.

---
 trainer/diffusers_trainer.py | 165 ++++++++++++++++++++++++++++++-----
 1 file changed, 143 insertions(+), 22 deletions(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index 75a5afe..7cad170 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -26,6 +26,7 @@ import numpy as np
 import json
 import re
 import traceback
+import shutil
 
 try:
     pynvml.nvmlInit()
@@ -38,6 +39,7 @@ from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
 from diffusers.optimization import get_scheduler
 from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 from PIL import Image, ImageOps
+from PIL.Image import Image as Img
 
 from typing import Dict, List, Generator, Tuple
 from scipy.interpolate import interp1d
@@ -83,6 +85,10 @@ parser.add_argument('--clip_penultimate', type=str, default='False', help='Use p
 parser.add_argument('--output_bucket_info', type=str, default='False', help='Outputs bucket information and exits')
 parser.add_argument('--resize', type=str, default='False', help="Resizes dataset's images to the appropriate bucket dimensions.")
 parser.add_argument('--use_xformers', type=str, default='False', help='Use memory efficient attention')
+parser.add_argument('--extended_validation', type=str, default='False', help='Perform extended validation of images to catch truncated or corrupt images.')
+parser.add_argument('--data_migration', type=str, default='True', help='Perform migration of resized images into a directory relative to the dataset path. Saves into `<dataset_directory_name>_cropped`.')
+parser.add_argument('--skip_validation', type=str, default='False', help='Skip validation of images, useful for speeding up loading of very large datasets that have already been validated.')
+
 args = parser.parse_args()
 
 for arg in vars(args):
@@ -149,39 +155,153 @@ def _sort_by_ratio(bucket: tuple) -> float:
 def _sort_by_area(bucket: tuple) -> float:
     return bucket[0] * bucket[1]
 
+class Validation():
+    def __init__(self, is_skipped: bool, is_extended: bool) -> None:
+        if is_skipped:
+            self.validate = self.__no_op
+            print("Validation: Skipped")
+            return 
+
+        if is_extended:
+            self.validate = self.__extended_validate
+            return print("Validation: Extended")
+
+        self.validate = self.__validate
+        print("Validation: Standard")
+
+    def completed(self) -> None:
+        self.validate = self.__no_op
+        return print('Validation complete. Skipping further validation.')
+
+    def __validate(self, fp: str) -> bool:
+        try:
+            Image.open(fp)
+            return True
+        except:
+            print(f'WARNING: Image cannot be opened: {fp}')
+            return False
+
+    def __extended_validate(self, fp: str) -> bool:
+        try:
+            Image.open(fp).load()
+            return True
+        except (OSError) as error:
+            if 'truncated' in str(error):
+                print(f'WARNING: Image truncated: {error}')
+                return False
+            print(f'WARNING: Image cannot be opened: {error}')
+            return False
+        except:
+            print(f'WARNING: Image cannot be opened: {error}')
+            return False
+
+    def __no_op(self, fp: str) -> bool:
+        return True
+
+class Resize():
+    def __init__(self, is_resizing: bool, is_migrating: bool) -> None:
+        if not is_resizing:
+            self.resize = self.__no_op
+            return
+
+        if is_migrating:
+            self.resize = self.__migration
+            dataset_path = os.path.split(args.dataset)
+            self.__directory = os.path.join(
+                dataset_path[0],
+                f'{dataset_path[1]}_cropped'
+            )
+            os.makedirs(self.__directory, exist_ok=True)
+            return print(f"Resizing: Performing migration to '{self.__directory}'.")
+
+        self.resize = self.__no_migration
+
+    def __no_migration(self, image_path: str, w: int, h: int) -> Img:
+        return ImageOps.fit(
+                Image.open(image_path),
+                (w, h),
+                bleed=0.0,
+                centering=(0.5, 0.5),
+                method=Image.Resampling.LANCZOS
+            ).convert(mode='RGB')
+
+    def __migration(self, image_path: str, w: int, h: int) -> Img:
+        filename = re.sub('\.[^/.]+$', '', os.path.split(image_path)[1])
+
+        image = ImageOps.fit(
+                Image.open(image_path),
+                (w, h),
+                bleed=0.0,
+                centering=(0.5, 0.5),
+                method=Image.Resampling.LANCZOS
+            ).convert(mode='RGB')
+
+        image.save(
+            os.path.join(f'{self.__directory}', f'{filename}.jpg'),
+            optimize=True
+        )
+
+        try:
+            shutil.copy(
+                os.path.join(args.dataset, f'{filename}.txt'),
+                os.path.join(self.__directory, f'{filename}.txt'),
+                follow_symlinks=False
+            )
+        except (FileNotFoundError):
+            f = open(
+                os.path.join(self.__directory, f'{filename}.txt'),
+                'w',
+                encoding='UTF-8'
+            )
+            f.close()
+
+        return image
+
+    def __no_op(self, image_path: str, w: int, h: int) -> Img:
+        return Image.open(image_path)
+
 class ImageStore:
     def __init__(self, data_dir: str) -> None:
         self.data_dir = data_dir
 
         self.image_files = []
         [self.image_files.extend(glob.glob(f'{data_dir}' + '/*.' + e)) for e in ['jpg', 'jpeg', 'png', 'bmp', 'webp']]
+
+        self.validator = Validation(
+            args.skip_validation,
+            args.extended_validation
+        )
+
+        self.resizer = Resize(args.resize, args.data_migration)
+
         self.image_files = [x for x in self.image_files if self.__valid_file(x)]
 
     def __len__(self) -> int:
         return len(self.image_files)
 
     def __valid_file(self, f) -> bool:
-        try:
-            Image.open(f)
-            return True
-        except:
-            print(f'WARNING: Unable to open file: {f}')
-            return False
+        return self.validator.validate(f)
+
+
 
     # iterator returns images as PIL images and their index in the store
-    def entries_iterator(self) -> Generator[Tuple[Image.Image, int], None, None]:
+    def entries_iterator(self) -> Generator[Tuple[Img, int], None, None]:
         for f in range(len(self)):
-            yield Image.open(self.image_files[f]).convert(mode='RGB'), f
+            yield Image.open(self.image_files[f]), f
 
     # get image by index
-    def get_image(self, ref: Tuple[int, int, int]) -> Image.Image:
-        return Image.open(self.image_files[ref[0]]).convert(mode='RGB')
+    def get_image(self, ref: Tuple[int, int, int]) -> Img:
+        return self.resizer.resize(
+            self.image_files[ref[0]],
+            ref[1],
+            ref[2]
+        )
 
     # gets caption by removing the extension from the filename and replacing it with .txt
     def get_caption(self, ref: Tuple[int, int, int]) -> str:
-        filename = re.sub('\.[^/.]+$', '', self.image_files[ref[0]]) + '.txt'
-        with open(filename, 'r', encoding='UTF-8') as f:
-            return f.read()
+        #filename = re.sub('\.[^/.]+$', '', self.image_files[ref[0]]) + '.txt'
+        #with open(filename, 'r', encoding='UTF-8') as f:
+            return ''
 
 
 # ====================================== #
@@ -403,15 +523,6 @@ class AspectDataset(torch.utils.data.Dataset):
 
         image_file = self.store.get_image(item)
 
-        if args.resize:
-            image_file = ImageOps.fit(
-                image_file,
-                (item[1], item[2]),
-                bleed=0.0,
-                centering=(0.5, 0.5),
-                method=Image.Resampling.LANCZOS
-            )
-
         return_dict['pixel_values'] = self.transforms(image_file)
         if random.random() > self.ucg:
             caption_file = self.store.get_caption(item)
@@ -616,6 +727,16 @@ def main():
         collate_fn=dataset.collate_fn
     )
 
+    # Validate dataset and perform possible migration
+    for _, batch in enumerate(train_dataloader):
+        continue
+
+    store.validator.completed()
+
+    if args.resize and args.migration:
+        print(f"Completed resize and migration to '{args.dataset}_cropped' please relaunch the trainer without the --resize argument and train on the migrated dataset.")
+        exit(0)
+
     weight_dtype = torch.float16 if args.fp16 else torch.float32
 
     # move models to device

From 6480336d2cd1879682b5b36b7cd0941d895c85c4 Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Fri, 11 Nov 2022 17:17:50 -0700
Subject: [PATCH 02/14] Cleanup test code.

---
 trainer/diffusers_trainer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index 7cad170..34adf60 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -299,9 +299,9 @@ class ImageStore:
 
     # gets caption by removing the extension from the filename and replacing it with .txt
     def get_caption(self, ref: Tuple[int, int, int]) -> str:
-        #filename = re.sub('\.[^/.]+$', '', self.image_files[ref[0]]) + '.txt'
-        #with open(filename, 'r', encoding='UTF-8') as f:
-            return ''
+        filename = re.sub('\.[^/.]+$', '', self.image_files[ref[0]]) + '.txt'
+        with open(filename, 'r', encoding='UTF-8') as f:
+            return f.read()
 
 
 # ====================================== #

From c12cbfced3059dd2d80230ced7f8db0cfab71ae4 Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Fri, 11 Nov 2022 17:43:09 -0700
Subject: [PATCH 03/14] Fixed ref typo.

---
 trainer/diffusers_trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index 34adf60..2227d83 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -733,7 +733,7 @@ def main():
 
     store.validator.completed()
 
-    if args.resize and args.migration:
+    if args.resize and args.data_migration:
         print(f"Completed resize and migration to '{args.dataset}_cropped' please relaunch the trainer without the --resize argument and train on the migrated dataset.")
         exit(0)
 

From 925eacf374a2124e49cb338e16ae5d02e4d14f31 Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Fri, 11 Nov 2022 17:50:23 -0700
Subject: [PATCH 04/14] Cleanup

---
 trainer/diffusers_trainer.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index 2227d83..8db12ab 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -282,8 +282,6 @@ class ImageStore:
     def __valid_file(self, f) -> bool:
         return self.validator.validate(f)
 
-
-
     # iterator returns images as PIL images and their index in the store
     def entries_iterator(self) -> Generator[Tuple[Img, int], None, None]:
         for f in range(len(self)):

From de221ea42ef62e7cf45016cb2f6df1a8023bcb66 Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Fri, 11 Nov 2022 17:59:32 -0700
Subject: [PATCH 05/14] Derp. ImageStore.__init__ already iterates fully :)

---
 trainer/diffusers_trainer.py | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index 8db12ab..c04bec2 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -169,10 +169,6 @@ class Validation():
         self.validate = self.__validate
         print("Validation: Standard")
 
-    def completed(self) -> None:
-        self.validate = self.__no_op
-        return print('Validation complete. Skipping further validation.')
-
     def __validate(self, fp: str) -> bool:
         try:
             Image.open(fp)
@@ -724,14 +720,11 @@ def main():
         num_workers=0,
         collate_fn=dataset.collate_fn
     )
-
-    # Validate dataset and perform possible migration
-    for _, batch in enumerate(train_dataloader):
-        continue
-
-    store.validator.completed()
-
+    
+    # Migrate dataset
     if args.resize and args.data_migration:
+        for _, batch in enumerate(train_dataloader):
+            continue
         print(f"Completed resize and migration to '{args.dataset}_cropped' please relaunch the trainer without the --resize argument and train on the migrated dataset.")
         exit(0)
 

From 6c2d5d80664198fcc103fcf579874be2cdcdec3f Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Fri, 11 Nov 2022 18:09:09 -0700
Subject: [PATCH 06/14] Final cleanup.

---
 trainer/diffusers_trainer.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index c04bec2..d42e9af 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -266,18 +266,15 @@ class ImageStore:
         self.validator = Validation(
             args.skip_validation,
             args.extended_validation
-        )
+        ).validate
 
-        self.resizer = Resize(args.resize, args.data_migration)
+        self.resizer = Resize(args.resize, args.data_migration).resize
 
-        self.image_files = [x for x in self.image_files if self.__valid_file(x)]
+        self.image_files = [x for x in self.image_files if self.validator(x)]
 
     def __len__(self) -> int:
         return len(self.image_files)
 
-    def __valid_file(self, f) -> bool:
-        return self.validator.validate(f)
-
     # iterator returns images as PIL images and their index in the store
     def entries_iterator(self) -> Generator[Tuple[Img, int], None, None]:
         for f in range(len(self)):
@@ -285,7 +282,7 @@ class ImageStore:
 
     # get image by index
     def get_image(self, ref: Tuple[int, int, int]) -> Img:
-        return self.resizer.resize(
+        return self.resizer(
             self.image_files[ref[0]],
             ref[1],
             ref[2]

From d1eb3ace3f945499ab251cf4f5a0b38e66808770 Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Fri, 11 Nov 2022 18:17:29 -0700
Subject: [PATCH 07/14] I lied.

---
 trainer/diffusers_trainer.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index d42e9af..b371919 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -159,8 +159,7 @@ class Validation():
     def __init__(self, is_skipped: bool, is_extended: bool) -> None:
         if is_skipped:
             self.validate = self.__no_op
-            print("Validation: Skipped")
-            return 
+            return print("Validation: Skipped")
 
         if is_extended:
             self.validate = self.__extended_validate

From 189f621a1ed83e78c57dea436eefb3f853c7ccdb Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Sat, 12 Nov 2022 15:47:17 -0700
Subject: [PATCH 08/14] Here, let's fix this while we're at it.

---
 trainer/diffusers_trainer.py | 42 ++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 23 deletions(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index b371919..4bfce2f 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -48,6 +48,9 @@ torch.backends.cuda.matmul.allow_tf32 = True
 
 # defaults should be good for everyone
 # TODO: add custom VAE support. should be simple with diffusers
+# use action='store_bool' when looking for boolean values so the arguments are treated like flags (as expected)
+# just keep in mind it's logically flipped from 'default', 
+# ('--foo', action='store_false') returns false when the flag exists, and true if it does not.
 parser = argparse.ArgumentParser(description='Stable Diffusion Finetuner')
 parser.add_argument('--model', type=str, default=None, required=True, help='The name of the model to use for finetuning. Could be HuggingFace ID or a directory')
 parser.add_argument('--resume', type=str, default=None, help='The path to the checkpoint to resume from. If not specified, will create a new run.')
@@ -59,10 +62,10 @@ parser.add_argument('--bucket_side_max', type=int, default=768, help='The maximu
 parser.add_argument('--lr', type=float, default=5e-6, help='Learning rate')
 parser.add_argument('--epochs', type=int, default=10, help='Number of epochs to train for')
 parser.add_argument('--batch_size', type=int, default=1, help='Batch size')
-parser.add_argument('--use_ema', type=str, default='False', help='Use EMA for finetuning')
+parser.add_argument('--use_ema', action='store_true', help='Use EMA for finetuning')
 parser.add_argument('--ucg', type=float, default=0.1, help='Percentage chance of dropping out the text condition per batch. Ranges from 0.0 to 1.0 where 1.0 means 100% text condition dropout.') # 10% dropout probability
-parser.add_argument('--gradient_checkpointing', dest='gradient_checkpointing', type=str, default='False', help='Enable gradient checkpointing')
-parser.add_argument('--use_8bit_adam', dest='use_8bit_adam', type=str, default='False', help='Use 8-bit Adam optimizer')
+parser.add_argument('--gradient_checkpointing', dest='gradient_checkpointing', action='store_true', help='Enable gradient checkpointing')
+parser.add_argument('--use_8bit_adam', dest='use_8bit_adam', action='store_true', help='Use 8-bit Adam optimizer')
 parser.add_argument('--adam_beta1', type=float, default=0.9, help='Adam beta1')
 parser.add_argument('--adam_beta2', type=float, default=0.999, help='Adam beta2')
 parser.add_argument('--adam_weight_decay', type=float, default=1e-2, help='Adam weight decay')
@@ -73,31 +76,24 @@ parser.add_argument('--seed', type=int, default=42, help='Seed for random number
 parser.add_argument('--output_path', type=str, default='./output', help='Root path for all outputs.')
 parser.add_argument('--save_steps', type=int, default=500, help='Number of steps to save checkpoints at.')
 parser.add_argument('--resolution', type=int, default=512, help='Image resolution to train against. Lower res images will be scaled up to this resolution and higher res images will be scaled down.')
-parser.add_argument('--shuffle', dest='shuffle', type=str, default='True', help='Shuffle dataset')
+parser.add_argument('--shuffle', dest='shuffle', action='store_true', help='Shuffle dataset')
 parser.add_argument('--hf_token', type=str, default=None, required=False, help='A HuggingFace token is needed to download private models for training.')
 parser.add_argument('--project_id', type=str, default='diffusers', help='Project ID for reporting to WandB')
-parser.add_argument('--fp16', dest='fp16', type=str, default='False', help='Train in mixed precision')
+parser.add_argument('--fp16', dest='fp16', action='store_true', help='Train in mixed precision')
 parser.add_argument('--image_log_steps', type=int, default=100, help='Number of steps to log images at.')
 parser.add_argument('--image_log_amount', type=int, default=4, help='Number of images to log every image_log_steps')
 parser.add_argument('--image_log_inference_steps', type=int, default=50, help='Number of inference steps to use to log images.')
 parser.add_argument('--image_log_scheduler', type=str, default="PNDMScheduler", help='Number of inference steps to use to log images.')
-parser.add_argument('--clip_penultimate', type=str, default='False', help='Use penultimate CLIP layer for text embedding')
-parser.add_argument('--output_bucket_info', type=str, default='False', help='Outputs bucket information and exits')
-parser.add_argument('--resize', type=str, default='False', help="Resizes dataset's images to the appropriate bucket dimensions.")
-parser.add_argument('--use_xformers', type=str, default='False', help='Use memory efficient attention')
-parser.add_argument('--extended_validation', type=str, default='False', help='Perform extended validation of images to catch truncated or corrupt images.')
-parser.add_argument('--data_migration', type=str, default='True', help='Perform migration of resized images into a directory relative to the dataset path. Saves into `<dataset_directory_name>_cropped`.')
-parser.add_argument('--skip_validation', type=str, default='False', help='Skip validation of images, useful for speeding up loading of very large datasets that have already been validated.')
+parser.add_argument('--clip_penultimate', action='store_true', help='Use penultimate CLIP layer for text embedding')
+parser.add_argument('--output_bucket_info', action='store_true', help='Outputs bucket information and exits')
+parser.add_argument('--resize', action='store_true', help="Resizes dataset's images to the appropriate bucket dimensions.")
+parser.add_argument('--use_xformers', action='store_true', help='Use memory efficient attention')
+parser.add_argument('--extended_validation', action='store_true', help='Perform extended validation of images to catch truncated or corrupt images.')
+parser.add_argument('--no_migration', action='store_true', help='Perform migration of resized images into a directory relative to the dataset path. Saves into `<dataset_directory_name>_cropped`.')
+parser.add_argument('--skip_validation', action='store_true', help='Skip validation of images, useful for speeding up loading of very large datasets that have already been validated.')
 
 args = parser.parse_args()
 
-for arg in vars(args):
-    if type(getattr(args, arg)) == str:
-        if getattr(args, arg).lower() == 'true':
-            setattr(args, arg, True)
-        elif getattr(args, arg).lower() == 'false':
-            setattr(args, arg, False)
-
 def setup():
     torch.distributed.init_process_group("nccl", init_method="env://")
 
@@ -194,12 +190,12 @@ class Validation():
         return True
 
 class Resize():
-    def __init__(self, is_resizing: bool, is_migrating: bool) -> None:
+    def __init__(self, is_resizing: bool, is_not_migrating: bool) -> None:
         if not is_resizing:
             self.resize = self.__no_op
             return
 
-        if is_migrating:
+        if not is_not_migrating:
             self.resize = self.__migration
             dataset_path = os.path.split(args.dataset)
             self.__directory = os.path.join(
@@ -267,7 +263,7 @@ class ImageStore:
             args.extended_validation
         ).validate
 
-        self.resizer = Resize(args.resize, args.data_migration).resize
+        self.resizer = Resize(args.resize, args.no_migration).resize
 
         self.image_files = [x for x in self.image_files if self.validator(x)]
 
@@ -718,7 +714,7 @@ def main():
     )
     
     # Migrate dataset
-    if args.resize and args.data_migration:
+    if args.resize and not args.no_migration:
         for _, batch in enumerate(train_dataloader):
             continue
         print(f"Completed resize and migration to '{args.dataset}_cropped' please relaunch the trainer without the --resize argument and train on the migrated dataset.")

From 6bd6c6a4ef81518e8ed603d20b30e4e5a2efbd6d Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Sat, 12 Nov 2022 16:11:30 -0700
Subject: [PATCH 09/14] Fixed/flipped help text.

---
 trainer/diffusers_trainer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index 4bfce2f..33c522c 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -89,7 +89,7 @@ parser.add_argument('--output_bucket_info', action='store_true', help='Outputs b
 parser.add_argument('--resize', action='store_true', help="Resizes dataset's images to the appropriate bucket dimensions.")
 parser.add_argument('--use_xformers', action='store_true', help='Use memory efficient attention')
 parser.add_argument('--extended_validation', action='store_true', help='Perform extended validation of images to catch truncated or corrupt images.')
-parser.add_argument('--no_migration', action='store_true', help='Perform migration of resized images into a directory relative to the dataset path. Saves into `<dataset_directory_name>_cropped`.')
+parser.add_argument('--no_migration', action='store_true', help='Do not perform migration of dataset while the `--resize` flag is active. Migration creates an adjacent folder to the dataset with <dataset_dirname>_cropped.')
 parser.add_argument('--skip_validation', action='store_true', help='Skip validation of images, useful for speeding up loading of very large datasets that have already been validated.')
 
 args = parser.parse_args()

From 95b9407a3e1f13f8b3f71dfb2e9efa236d7a1567 Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Sat, 12 Nov 2022 18:48:16 -0700
Subject: [PATCH 10/14] Add+config .gitignore (bring back git stage) and fix up
 documentation.

---
 .gitignore                   | 10 ++++++++++
 trainer/diffusers_trainer.py |  4 ++--
 2 files changed, 12 insertions(+), 2 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a454386
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+aesthetic/**
+*_cropped/
+*_cropped/**
+**/nsfw-ids.txt
+**/*.image
+**/*.caption
+**/dataset*.tar
+*/**/*.json
+**/*.png
+**/*.jpg
diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index 33c522c..2c6072c 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -2,8 +2,8 @@
 # `nvcc --version` to get CUDA version.
 # `pip install -i https://test.pypi.org/simple/ bitsandbytes-cudaXXX` to install for current CUDA.
 # Example Usage:
-# Single GPU: torchrun --nproc_per_node=1 trainer_dist.py --model="CompVis/stable-diffusion-v1-4" --run_name="liminal" --dataset="liminal-dataset" --hf_token="hf_blablabla" --bucket_side_min=64 --use_8bit_adam=True --gradient_checkpointing=True --batch_size=10 --fp16=True --image_log_steps=250 --epochs=20 --resolution=768 --use_ema=True
-# Multiple GPUs: torchrun --nproc_per_node=N trainer_dist.py --model="CompVis/stable-diffusion-v1-4" --run_name="liminal" --dataset="liminal-dataset" --hf_token="hf_blablabla" --bucket_side_min=64 --use_8bit_adam=True --gradient_checkpointing=True --batch_size=10 --fp16=True --image_log_steps=250 --epochs=20 --resolution=768 --use_ema=True
+# Single GPU: torchrun --nproc_per_node=1 trainer/diffusers_trainer.py --model="CompVis/stable-diffusion-v1-4" --run_name="liminal" --dataset="liminal-dataset" --hf_token="hf_blablabla" --bucket_side_min=64 --use_8bit_adam --gradient_checkpointing --batch_size=1 --fp16 --image_log_steps=250 --epochs=20 --resolution=768 --use_ema
+# Multiple GPUs: torchrun --nproc_per_node=N trainer/diffusers_trainer.py --model="CompVis/stable-diffusion-v1-4" --run_name="liminal" --dataset="liminal-dataset" --hf_token="hf_blablabla" --bucket_side_min=64 --use_8bit_adam --gradient_checkpointing --batch_size=10 --fp16 --image_log_steps=250 --epochs=20 --resolution=768 --use_ema
 
 import argparse
 import socket

From 4943d978c128407d561d7d785ea03a25829e8b10 Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Sun, 13 Nov 2022 08:22:44 -0700
Subject: [PATCH 11/14] Fix redundancies.

---
 .gitignore | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index a454386..ff051ed 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,9 @@
 aesthetic/**
-*_cropped/
-*_cropped/**
+**/*_cropped
 **/nsfw-ids.txt
 **/*.image
 **/*.caption
 **/dataset*.tar
-*/**/*.json
+**/*.json
 **/*.png
 **/*.jpg

From 978dd45072866ae2781e411979afb7d5c13eea45 Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Sun, 13 Nov 2022 08:24:40 -0700
Subject: [PATCH 12/14] Fix.

---
 .gitignore | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index ff051ed..3d8e00d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,4 @@
-aesthetic/**
-**/*_cropped
+*_cropped/
 **/nsfw-ids.txt
 **/*.image
 **/*.caption

From 2c18d29613d22b8b74e07b929c079c34d010a80e Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Tue, 15 Nov 2022 06:42:14 -0700
Subject: [PATCH 13/14] Fix from upstream merge.

---
 trainer/diffusers_trainer.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index dc2508a..d8e9f47 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -86,6 +86,10 @@ parser.add_argument('--clip_penultimate', type=bool_t, default='False', help='Us
 parser.add_argument('--output_bucket_info', type=bool_t, default='False', help='Outputs bucket information and exits')
 parser.add_argument('--resize', type=bool_t, default='False', help="Resizes dataset's images to the appropriate bucket dimensions.")
 parser.add_argument('--use_xformers', type=bool_t, default='False', help='Use memory efficient attention')
+parser.add_argument('--extended_validation', action='store_true', help='Perform extended validation of images to catch truncated or corrupt images.')
+parser.add_argument('--no_migration', action='store_true', help='Do not perform migration of dataset while the `--resize` flag is active. Migration creates an adjacent folder to the dataset with <dataset_dirname>_cropped.')
+parser.add_argument('--skip_validation', action='store_true', help='Skip validation of images, useful for speeding up loading of very large datasets that have already been validated.')
+
 args = parser.parse_args()
 
 def setup():

From 6c5b2e71496ee9b066fac82331779360a14b939e Mon Sep 17 00:00:00 2001
From: Maw-Fox <huskyz0rz@gmail.com>
Date: Tue, 15 Nov 2022 07:15:18 -0700
Subject: [PATCH 14/14] Fix of fix

---
 trainer/diffusers_trainer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/trainer/diffusers_trainer.py b/trainer/diffusers_trainer.py
index d8e9f47..9a6e2b7 100644
--- a/trainer/diffusers_trainer.py
+++ b/trainer/diffusers_trainer.py
@@ -86,9 +86,9 @@ parser.add_argument('--clip_penultimate', type=bool_t, default='False', help='Us
 parser.add_argument('--output_bucket_info', type=bool_t, default='False', help='Outputs bucket information and exits')
 parser.add_argument('--resize', type=bool_t, default='False', help="Resizes dataset's images to the appropriate bucket dimensions.")
 parser.add_argument('--use_xformers', type=bool_t, default='False', help='Use memory efficient attention')
-parser.add_argument('--extended_validation', action='store_true', help='Perform extended validation of images to catch truncated or corrupt images.')
-parser.add_argument('--no_migration', action='store_true', help='Do not perform migration of dataset while the `--resize` flag is active. Migration creates an adjacent folder to the dataset with <dataset_dirname>_cropped.')
-parser.add_argument('--skip_validation', action='store_true', help='Skip validation of images, useful for speeding up loading of very large datasets that have already been validated.')
+parser.add_argument('--extended_validation', type=bool_t, default='False', help='Perform extended validation of images to catch truncated or corrupt images.')
+parser.add_argument('--no_migration', type=bool_t, default='False', help='Do not perform migration of dataset while the `--resize` flag is active. Migration creates an adjacent folder to the dataset with <dataset_dirname>_cropped.')
+parser.add_argument('--skip_validation', type=bool_t, default='False', help='Skip validation of images, useful for speeding up loading of very large datasets that have already been validated.')
 
 args = parser.parse_args()