Merge branch 'master' into master

2022-08-24 16:50:57 +03:00 · 2022-08-24 16:50:57 +03:00 · 15a700bbe5
parent abb83239e5 34e9795505
commit 15a700bbe5
3 changed files with 85 additions and 26 deletions
--- a/README.md
+++ b/README.md
@ -133,3 +133,16 @@ the same effect. Use the --no-progressbar-hiding commandline option to revert th
 ### Prompt validation
 Stable Diffusion has a limit for input text length. If your prompt is too long, you will get a
 warning in the text output field, showing which parts of your text were truncated and ignored by the model.
 ### Loopback
 A checkbox for img2img allowing to automatically feed output image as input for the next batch. Equivalent to
 saving output image, and replacing input image with it. Batch count setting controls how many iterations of
 this you get.
 Usually, when doing this, you would choose one of many images for the next iteration yourself, so the usefulness
 of this feature may be questionable, but I've managed to get some very nice outputs with it that I wasn't abble
 to get otherwise.
 Example: (cherrypicked result; original picture by anon)
 ![](images/loopback.jpg)
--- a/images/loopback.jpg
+++ b/images/loopback.jpg
--- a/webui.py
+++ b/webui.py
@ -49,6 +49,8 @@ parser.add_argument("--gfpgan-dir", type=str, help="GFPGAN directory", default=(
 parser.add_argument("--no-verify-input", action='store_true', help="do not verify input to check if it's too long")
 parser.add_argument("--no-half", action='store_true', help="do not switch the model to 16-bit floats")
 parser.add_argument("--no-progressbar-hiding", action='store_true', help="do not hide progressbar in gradio UI (we hide it because it slows down ML if you have hardware accleration in browser)")
 parser.add_argument("--max-batch-count",  type=int, default=16, help="maximum batch count value for the UI")
 parser.add_argument("--grid-format",  type=str, default='png', help="file format for saved grids; can be png or jpg")
 opt = parser.parse_args()
 GFPGAN_dir = opt.gfpgan_dir
@ -159,8 +161,10 @@ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cp
 model = (model if opt.no_half else model.half()).to(device)
-def image_grid(imgs, batch_size, round_down=False):
+def image_grid(imgs, batch_size, round_down=False, force_n_rows=None):
-    if opt.n_rows > 0:
+    if force_n_rows is not None:
        rows = force_n_rows
    elif opt.n_rows > 0:
        rows = opt.n_rows
    elif opt.n_rows == 0:
        rows = batch_size
@ -299,7 +303,7 @@ def check_prompt_length(prompt, comments):
    comments.append(f"Warning: too many input tokens; some ({len(overflowing_words)}) have been truncated:\n{overflowing_text}\n")
-def process_images(outpath, func_init, func_sample, prompt, seed, sampler_name, batch_size, n_iter, steps, cfg_scale, width, height, prompt_matrix, use_GFPGAN):
+def process_images(outpath, func_init, func_sample, prompt, seed, sampler_name, batch_size, n_iter, steps, cfg_scale, width, height, prompt_matrix, use_GFPGAN, do_not_save_grid=False):
    """this is the main loop that both txt2img and img2img use; it calls func_init once inside all the scopes and func_sample once per batch"""
    assert prompt is not None
@ -390,7 +394,7 @@ def process_images(outpath, func_init, func_sample, prompt, seed, sampler_name,
                    output_images.append(image)
                    base_count += 1
-        if prompt_matrix or not opt.skip_grid:
+        if (prompt_matrix or not opt.skip_grid) and not do_not_save_grid:
            grid = image_grid(output_images, batch_size, round_down=prompt_matrix)
            if prompt_matrix:
@ -404,8 +408,8 @@ def process_images(outpath, func_init, func_sample, prompt, seed, sampler_name,
                output_images.insert(0, grid)
-            grid_file = f"grid-{grid_count:05}-{seed}_{prompts[i].replace(' ', '_').translate({ord(x): '' for x in invalid_filename_chars})[:128]}.jpg"
+
-            grid.save(os.path.join(outpath, grid_file), 'jpeg', quality=80, optimize=True)
+            grid.save(os.path.join(outpath, f'grid-{grid_count:04}.{opt.grid_format}'))
            grid_count += 1
    info = f"""
@ -510,7 +514,7 @@ txt2img_interface = gr.Interface(
        gr.Checkbox(label='Fix faces using GFPGAN', value=False, visible=GFPGAN is not None),
        gr.Checkbox(label='Create prompt matrix (separate multiple prompts using |, and get all combinations of them)', value=False),
        gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label="DDIM ETA", value=0.0, visible=False),
-        gr.Slider(minimum=1, maximum=16, step=1, label='Batch count (how many batches of images to generate)', value=1),
+        gr.Slider(minimum=1, maximum=opt.max_batch_count, step=1, label='Batch count (how many batches of images to generate)', value=1),
        gr.Slider(minimum=1, maximum=8, step=1, label='Batch size (how many images are in a batch; memory-hungry)', value=1),
        gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='Classifier Free Guidance Scale (how strongly the image should follow the prompt)', value=7.0),
        gr.Number(label='Seed', value=-1),
@ -528,13 +532,12 @@ txt2img_interface = gr.Interface(
 )
-def img2img(prompt: str, init_img, ddim_steps: int, use_GFPGAN: bool, prompt_matrix, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, height: int, width: int, resize_mode: int):
+def img2img(prompt: str, init_img, ddim_steps: int, use_GFPGAN: bool, prompt_matrix, loopback: bool, n_iter: int, batch_size: int, cfg_scale: float, denoising_strength: float, seed: int, height: int, width: int, resize_mode: int):
    outpath = opt.outdir or "outputs/img2img-samples"
    sampler = KDiffusionSampler(model)
    assert 0. <= denoising_strength <= 1., 'can only work with strength in [0.0, 1.0]'
    t_enc = int(denoising_strength * ddim_steps)
    def init():
        image = init_img.convert("RGB")
@ -551,6 +554,8 @@ def img2img(prompt: str, init_img, ddim_steps: int, use_GFPGAN: bool, prompt_mat
        return init_latent,
    def sample(init_data, x, conditioning, unconditional_conditioning):
        t_enc = int(denoising_strength * ddim_steps)
        x0, = init_data
        sigmas = sampler.model_wrap.get_sigmas(ddim_steps)
@ -562,22 +567,62 @@ def img2img(prompt: str, init_img, ddim_steps: int, use_GFPGAN: bool, prompt_mat
        samples_ddim = K.sampling.sample_lms(model_wrap_cfg, xi, sigma_sched, extra_args={'cond': conditioning, 'uncond': unconditional_conditioning, 'cond_scale': cfg_scale}, disable=False)
        return samples_ddim
-    output_images, seed, info = process_images(
+    if loopback:
-        outpath=outpath,
+        output_images, info = None, None
-        func_init=init,
+        history = []
-        func_sample=sample,
+        initial_seed = None
-        prompt=prompt,
+
-        seed=seed,
+        for i in range(n_iter):
-        sampler_name='k-diffusion',
+            output_images, seed, info = process_images(
-        batch_size=batch_size,
+                outpath=outpath,
-        n_iter=n_iter,
+                func_init=init,
-        steps=ddim_steps,
+                func_sample=sample,
-        cfg_scale=cfg_scale,
+                prompt=prompt,
-        width=width,
+                seed=seed,
-        height=height,
+                sampler_name='k-diffusion',
-        prompt_matrix=prompt_matrix,
+                batch_size=1,
-        use_GFPGAN=use_GFPGAN
+                n_iter=1,
-    )
+                steps=ddim_steps,
                cfg_scale=cfg_scale,
                width=width,
                height=height,
                prompt_matrix=prompt_matrix,
                use_GFPGAN=use_GFPGAN,
                do_not_save_grid=True
            )
            if initial_seed is None:
                initial_seed = seed
            init_img = output_images[0]
            seed = seed + 1
            denoising_strength = max(denoising_strength * 0.95, 0.1)
            history.append(init_img)
        grid_count = len(os.listdir(outpath)) - 1
        grid = image_grid(history, batch_size, force_n_rows=1)
        grid.save(os.path.join(outpath, f'grid-{grid_count:04}.{opt.grid_format}'))
        output_images = history
        seed = initial_seed
    else:
        output_images, seed, info = process_images(
            outpath=outpath,
            func_init=init,
            func_sample=sample,
            prompt=prompt,
            seed=seed,
            sampler_name='k-diffusion',
            batch_size=batch_size,
            n_iter=n_iter,
            steps=ddim_steps,
            cfg_scale=cfg_scale,
            width=width,
            height=height,
            prompt_matrix=prompt_matrix,
            use_GFPGAN=use_GFPGAN
        )
    del sampler
@ -595,7 +640,8 @@ img2img_interface = gr.Interface(
        gr.Slider(minimum=1, maximum=150, step=1, label="Sampling Steps", value=50),
        gr.Checkbox(label='Fix faces using GFPGAN', value=False, visible=GFPGAN is not None),
        gr.Checkbox(label='Create prompt matrix (separate multiple prompts using |, and get all combinations of them)', value=False),
-        gr.Slider(minimum=1, maximum=16, step=1, label='Batch count (how many batches of images to generate)', value=1),
+        gr.Checkbox(label='Loopback (use images from previous batch when creating next batch)', value=False),
        gr.Slider(minimum=1, maximum=opt.max_batch_count, step=1, label='Batch count (how many batches of images to generate)', value=1),
        gr.Slider(minimum=1, maximum=8, step=1, label='Batch size (how many images are in a batch; memory-hungry)', value=1),
        gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='Classifier Free Guidance Scale (how strongly the image should follow the prompt)', value=7.0),
        gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising Strength', value=0.75),