Add files via upload

2022-09-29 11:32:06 -06:00 · 2022-09-29 11:32:06 -06:00 · 2767e526d0
parent 049f79a3da
commit 2767e526d0
1 changed files with 556 additions and 0 deletions
--- a/Training.ipynb
+++ b/Training.ipynb
@ -0,0 +1,556 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0cf778b6-9d7f-45ed-8838-93df28213e7a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_images_dir = '/notebooks/textual inversion/source/' # upload your images here\n",
+    "hf_auth_token = ''"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c9a306d5-a088-4fc1-800c-1e1ef57cd75c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mkdir -p \"{input_images_dir}\"\n",
+    "# Install the required libs\n",
+    "!pip install diffusers transformers ftfy\n",
+    "!pip install \"ipywidgets>=7,<8\"\n",
+    "!pip install --upgrade diffusers transformers scipy\n",
+    "!pip install accelerate"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "48dcdb96-e9a6-4141-8442-5eba0280c31a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import required libraries\n",
+    "import argparse\n",
+    "import itertools\n",
+    "import math\n",
+    "import os\n",
+    "import random\n",
+    "\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "import torch.utils.checkpoint\n",
+    "from torch.utils.data import Dataset\n",
+    "\n",
+    "import PIL\n",
+    "from accelerate import Accelerator\n",
+    "from accelerate.logging import get_logger\n",
+    "from accelerate.utils import set_seed\n",
+    "from diffusers import AutoencoderKL, DDPMScheduler, PNDMScheduler, StableDiffusionPipeline, UNet2DConditionModel\n",
+    "from diffusers.hub_utils import init_git_repo, push_to_hub\n",
+    "from diffusers.optimization import get_scheduler\n",
+    "from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker\n",
+    "from PIL import Image\n",
+    "from torchvision import transforms\n",
+    "from tqdm.auto import tqdm\n",
+    "from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer\n",
+    "\n",
+    "def image_grid(imgs, rows, cols):\n",
+    "    assert len(imgs) == rows*cols\n",
+    "\n",
+    "    w, h = imgs[0].size\n",
+    "    grid = Image.new('RGB', size=(cols*w, rows*h))\n",
+    "    grid_w, grid_h = grid.size\n",
+    "    \n",
+    "    for i, img in enumerate(imgs):\n",
+    "        grid.paste(img, box=(i%cols*w, i//cols*h))\n",
+    "    return grid"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "690c3700-0386-44cf-b8d2-1c9a8b85e296",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Settings for your newly created concept\n",
+    "\n",
+    "# What is it that you are teaching?\n",
+    "# object enables you to teach the model a new object to be used\n",
+    "# style allows you to teach the model a new style one can use\n",
+    "what_to_teach = \"style\" # [\"object\", \"style\"]\n",
+    "\n",
+    "# `placeholder_token` is the token you are going to use to represent your new concept\n",
+    "# (so when you prompt the model, you will say \"A `<my-placeholder-token>` in an amusement park\")\n",
+    "# We use angle brackets to differentiate a token from other words/tokens, to avoid collision.\n",
+    "placeholder_token = \"<banana>\" #@param {type:\"string\"}\n",
+    "\n",
+    "#`initializer_token` is a word that can summarise what your new concept is, to be used as a starting point\n",
+    "initializer_token = \"art\" #@param {type:\"string\"}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e866762e-d3e8-4977-99ad-d742114f2fe6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import glob\n",
+    "from io import BytesIO\n",
+    "\n",
+    "def absoluteFilePaths(directory):\n",
+    "    x = []\n",
+    "    for dirpath,_,filenames in os.walk(directory):\n",
+    "        for f in filenames:\n",
+    "            x.append(os.path.abspath(os.path.join(dirpath, f)))\n",
+    "    return x\n",
+    "\n",
+    "def download_image(url):\n",
+    "  return Image.open(url).convert(\"RGB\")\n",
+    "\n",
+    "save_path = '/notebooks/textual inversion/input/'\n",
+    "!rm -rf \"{save_path}\"\n",
+    "!mkdir -p \"{save_path}\"\n",
+    "\n",
+    "images = list(filter(None,[download_image(url) for url in absoluteFilePaths(input_images_dir)]))\n",
+    "[image.save(f\"{save_path}/{i}.jpeg\") for i, image in enumerate(images)]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a882b1c8-97e3-4487-8300-083aa359ad1d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@title Setup the prompt templates for training\n",
+    "imagenet_templates_small = [\n",
+    "    \"a photo of a {}\",\n",
+    "    \"a rendering of a {}\",\n",
+    "    \"a cropped photo of the {}\",\n",
+    "    \"the photo of a {}\",\n",
+    "    \"a photo of a clean {}\",\n",
+    "    \"a photo of a dirty {}\",\n",
+    "    \"a dark photo of the {}\",\n",
+    "    \"a photo of my {}\",\n",
+    "    \"a photo of the cool {}\",\n",
+    "    \"a close-up photo of a {}\",\n",
+    "    \"a bright photo of the {}\",\n",
+    "    \"a cropped photo of a {}\",\n",
+    "    \"a photo of the {}\",\n",
+    "    \"a good photo of the {}\",\n",
+    "    \"a photo of one {}\",\n",
+    "    \"a close-up photo of the {}\",\n",
+    "    \"a rendition of the {}\",\n",
+    "    \"a photo of the clean {}\",\n",
+    "    \"a rendition of a {}\",\n",
+    "    \"a photo of a nice {}\",\n",
+    "    \"a good photo of a {}\",\n",
+    "    \"a photo of the nice {}\",\n",
+    "    \"a photo of the small {}\",\n",
+    "    \"a photo of the weird {}\",\n",
+    "    \"a photo of the large {}\",\n",
+    "    \"a photo of a cool {}\",\n",
+    "    \"a photo of a small {}\",\n",
+    "]\n",
+    "\n",
+    "imagenet_style_templates_small = [\n",
+    "    \"a painting in the style of {}\",\n",
+    "    \"a rendering in the style of {}\",\n",
+    "    \"a cropped painting in the style of {}\",\n",
+    "    \"the painting in the style of {}\",\n",
+    "    \"a clean painting in the style of {}\",\n",
+    "    \"a dirty painting in the style of {}\",\n",
+    "    \"a dark painting in the style of {}\",\n",
+    "    \"a picture in the style of {}\",\n",
+    "    \"a cool painting in the style of {}\",\n",
+    "    \"a close-up painting in the style of {}\",\n",
+    "    \"a bright painting in the style of {}\",\n",
+    "    \"a cropped painting in the style of {}\",\n",
+    "    \"a good painting in the style of {}\",\n",
+    "    \"a close-up painting in the style of {}\",\n",
+    "    \"a rendition in the style of {}\",\n",
+    "    \"a nice painting in the style of {}\",\n",
+    "    \"a small painting in the style of {}\",\n",
+    "    \"a weird painting in the style of {}\",\n",
+    "    \"a large painting in the style of {}\",\n",
+    "]\n",
+    "#@title Setup the dataset\n",
+    "pretrained_model_name_or_path = 'CompVis/stable-diffusion-v1-4'\n",
+    "class TextualInversionDataset(Dataset):\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        data_root,\n",
+    "        tokenizer,\n",
+    "        learnable_property=\"object\",  # [object, style]\n",
+    "        size=512,\n",
+    "        repeats=100,\n",
+    "        interpolation=\"bicubic\",\n",
+    "        flip_p=0.5,\n",
+    "        set=\"train\",\n",
+    "        placeholder_token=\"*\",\n",
+    "        center_crop=False,\n",
+    "    ):\n",
+    "\n",
+    "        self.data_root = data_root\n",
+    "        self.tokenizer = tokenizer\n",
+    "        self.learnable_property = learnable_property\n",
+    "        self.size = size\n",
+    "        self.placeholder_token = placeholder_token\n",
+    "        self.center_crop = center_crop\n",
+    "        self.flip_p = flip_p\n",
+    "\n",
+    "        self.image_paths = [os.path.join(self.data_root, file_path) for file_path in os.listdir(self.data_root)]\n",
+    "\n",
+    "        self.num_images = len(self.image_paths)\n",
+    "        self._length = self.num_images\n",
+    "\n",
+    "        if set == \"train\":\n",
+    "            self._length = self.num_images * repeats\n",
+    "\n",
+    "        self.interpolation = {\n",
+    "            \"linear\": PIL.Image.LINEAR,\n",
+    "            \"bilinear\": PIL.Image.BILINEAR,\n",
+    "            \"bicubic\": PIL.Image.BICUBIC,\n",
+    "            \"lanczos\": PIL.Image.LANCZOS,\n",
+    "        }[interpolation]\n",
+    "\n",
+    "        self.templates = imagenet_style_templates_small if learnable_property == \"style\" else imagenet_templates_small\n",
+    "        self.flip_transform = transforms.RandomHorizontalFlip(p=self.flip_p)\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self._length\n",
+    "\n",
+    "    def __getitem__(self, i):\n",
+    "        example = {}\n",
+    "        image = Image.open(self.image_paths[i % self.num_images])\n",
+    "\n",
+    "        if not image.mode == \"RGB\":\n",
+    "            image = image.convert(\"RGB\")\n",
+    "\n",
+    "        placeholder_string = self.placeholder_token\n",
+    "        text = random.choice(self.templates).format(placeholder_string)\n",
+    "\n",
+    "        example[\"input_ids\"] = self.tokenizer(\n",
+    "            text,\n",
+    "            padding=\"max_length\",\n",
+    "            truncation=True,\n",
+    "            max_length=self.tokenizer.model_max_length,\n",
+    "            return_tensors=\"pt\",\n",
+    "        ).input_ids[0]\n",
+    "\n",
+    "        # default to score-sde preprocessing\n",
+    "        img = np.array(image).astype(np.uint8)\n",
+    "\n",
+    "        if self.center_crop:\n",
+    "            crop = min(img.shape[0], img.shape[1])\n",
+    "            h, w, = (\n",
+    "                img.shape[0],\n",
+    "                img.shape[1],\n",
+    "            )\n",
+    "            img = img[(h - crop) // 2 : (h + crop) // 2, (w - crop) // 2 : (w + crop) // 2]\n",
+    "\n",
+    "        image = Image.fromarray(img)\n",
+    "        image = image.resize((self.size, self.size), resample=self.interpolation)\n",
+    "\n",
+    "        image = self.flip_transform(image)\n",
+    "        image = np.array(image).astype(np.uint8)\n",
+    "        image = (image / 127.5 - 1.0).astype(np.float32)\n",
+    "\n",
+    "        example[\"pixel_values\"] = torch.from_numpy(image).permute(2, 0, 1)\n",
+    "        return example\n",
+    "    \n",
+    "#@title Load the tokenizer and add the placeholder token as a additional special token.\n",
+    "#@markdown Please read and if you agree accept the LICENSE [here](https://huggingface.co/CompVis/stable-diffusion-v1-4) if you see an error\n",
+    "tokenizer = CLIPTokenizer.from_pretrained(\n",
+    "    pretrained_model_name_or_path,\n",
+    "    subfolder=\"tokenizer\",\n",
+    "    use_auth_token=hf_auth_token,\n",
+    ")\n",
+    "\n",
+    "# Add the placeholder token in tokenizer\n",
+    "num_added_tokens = tokenizer.add_tokens(placeholder_token)\n",
+    "if num_added_tokens == 0:\n",
+    "    raise ValueError(\n",
+    "        f\"The tokenizer already contains the token {placeholder_token}. Please pass a different\"\n",
+    "        \" `placeholder_token` that is not already in the tokenizer.\"\n",
+    "    )\n",
+    "    \n",
+    "#@title Get token ids for our placeholder and initializer token. This code block will complain if initializer string is not a single token\n",
+    "# Convert the initializer_token, placeholder_token to ids\n",
+    "token_ids = tokenizer.encode(initializer_token, add_special_tokens=False)\n",
+    "# Check if initializer_token is a single token or a sequence of tokens\n",
+    "if len(token_ids) > 1:\n",
+    "    raise ValueError(\"The initializer token must be a single token.\")\n",
+    "\n",
+    "initializer_token_id = token_ids[0]\n",
+    "placeholder_token_id = tokenizer.convert_tokens_to_ids(placeholder_token)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2b8daf19-a6fa-4977-b176-d7d5200c81a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@title Load the Stable Diffusion model\n",
+    "# Load models and create wrapper for stable diffusion\n",
+    "text_encoder = CLIPTextModel.from_pretrained(\n",
+    "    pretrained_model_name_or_path, subfolder=\"text_encoder\", use_auth_token=hf_auth_token\n",
+    ")\n",
+    "vae = AutoencoderKL.from_pretrained(\n",
+    "    pretrained_model_name_or_path, subfolder=\"vae\", use_auth_token=hf_auth_token\n",
+    ")\n",
+    "unet = UNet2DConditionModel.from_pretrained(\n",
+    "    pretrained_model_name_or_path, subfolder=\"unet\", use_auth_token=hf_auth_token\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "134c7b49-e8e8-4d10-9c96-018dd8123a2a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text_encoder.resize_token_embeddings(len(tokenizer))\n",
+    "token_embeds = text_encoder.get_input_embeddings().weight.data\n",
+    "token_embeds[placeholder_token_id] = token_embeds[initializer_token_id]\n",
+    "def freeze_params(params):\n",
+    "    for param in params:\n",
+    "        param.requires_grad = False\n",
+    "\n",
+    "# Freeze vae and unet\n",
+    "freeze_params(vae.parameters())\n",
+    "freeze_params(unet.parameters())\n",
+    "# Freeze all parameters except for the token embeddings in text encoder\n",
+    "params_to_freeze = itertools.chain(\n",
+    "    text_encoder.text_model.encoder.parameters(),\n",
+    "    text_encoder.text_model.final_layer_norm.parameters(),\n",
+    "    text_encoder.text_model.embeddings.position_embedding.parameters(),\n",
+    ")\n",
+    "freeze_params(params_to_freeze)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5ceb1fff-25c3-4c9a-ad5d-45fc9ab8189f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_dataset = TextualInversionDataset(\n",
+    "      data_root=save_path,\n",
+    "      tokenizer=tokenizer,\n",
+    "      size=512,\n",
+    "      placeholder_token=placeholder_token,\n",
+    "      repeats=100,\n",
+    "      learnable_property=what_to_teach, #Option selected above between object and style\n",
+    "      center_crop=False,\n",
+    "      set=\"train\",\n",
+    ")\n",
+    "def create_dataloader(train_batch_size=1):\n",
+    "    return torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True)\n",
+    "noise_scheduler = DDPMScheduler(\n",
+    "    beta_start=0.00085, beta_end=0.012, beta_schedule=\"scaled_linear\", num_train_timesteps=1000, tensor_format=\"pt\"\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a511581b-a610-4f86-be49-a94529a456dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# If you are not happy with your results, you can tune the `learning_rate` and the `max_train_steps`\n",
+    "hyperparameters = {\n",
+    "    \"learning_rate\": 5e-04,\n",
+    "    \"scale_lr\": True,\n",
+    "    \"max_train_steps\": 8500,\n",
+    "    \"train_batch_size\": 1,\n",
+    "    \"gradient_accumulation_steps\": 4,\n",
+    "    \"seed\": 42,\n",
+    "    \"output_dir\": \"sd-concept-output\"\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e5c9f8e1-c2da-4985-9f8c-fa5811d7ad20",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def training_function(text_encoder, vae, unet):\n",
+    "    logger = get_logger(__name__)\n",
+    "\n",
+    "    train_batch_size = hyperparameters[\"train_batch_size\"]\n",
+    "    gradient_accumulation_steps = hyperparameters[\"gradient_accumulation_steps\"]\n",
+    "    learning_rate = hyperparameters[\"learning_rate\"]\n",
+    "    max_train_steps = hyperparameters[\"max_train_steps\"]\n",
+    "    output_dir = hyperparameters[\"output_dir\"]\n",
+    "\n",
+    "    accelerator = Accelerator(\n",
+    "        gradient_accumulation_steps=gradient_accumulation_steps,\n",
+    "    )\n",
+    "\n",
+    "    train_dataloader = create_dataloader(train_batch_size)\n",
+    "\n",
+    "    if hyperparameters[\"scale_lr\"]:\n",
+    "        learning_rate = (\n",
+    "            learning_rate * gradient_accumulation_steps * train_batch_size * accelerator.num_processes\n",
+    "        )\n",
+    "\n",
+    "    # Initialize the optimizer\n",
+    "    optimizer = torch.optim.AdamW(\n",
+    "        text_encoder.get_input_embeddings().parameters(),  # only optimize the embeddings\n",
+    "        lr=learning_rate,\n",
+    "    )\n",
+    "\n",
+    "\n",
+    "    text_encoder, optimizer, train_dataloader = accelerator.prepare(\n",
+    "        text_encoder, optimizer, train_dataloader\n",
+    "    )\n",
+    "\n",
+    "    # Move vae and unet to device\n",
+    "    vae.to(accelerator.device)\n",
+    "    unet.to(accelerator.device)\n",
+    "\n",
+    "    # Keep vae and unet in eval model as we don't train these\n",
+    "    vae.eval()\n",
+    "    unet.eval()\n",
+    "\n",
+    "    # We need to recalculate our total training steps as the size of the training dataloader may have changed.\n",
+    "    num_update_steps_per_epoch = math.ceil(len(train_dataloader) / gradient_accumulation_steps)\n",
+    "    num_train_epochs = math.ceil(max_train_steps / num_update_steps_per_epoch)\n",
+    "\n",
+    "    # Train!\n",
+    "    total_batch_size = train_batch_size * accelerator.num_processes * gradient_accumulation_steps\n",
+    "\n",
+    "    logger.info(\"***** Running training *****\")\n",
+    "    logger.info(f\"  Num examples = {len(train_dataset)}\")\n",
+    "    logger.info(f\"  Instantaneous batch size per device = {train_batch_size}\")\n",
+    "    logger.info(f\"  Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}\")\n",
+    "    logger.info(f\"  Gradient Accumulation steps = {gradient_accumulation_steps}\")\n",
+    "    logger.info(f\"  Total optimization steps = {max_train_steps}\")\n",
+    "    # Only show the progress bar once on each machine.\n",
+    "    progress_bar = tqdm(range(max_train_steps), disable=not accelerator.is_local_main_process)\n",
+    "    progress_bar.set_description(\"Steps\")\n",
+    "    global_step = 0\n",
+    "\n",
+    "    for epoch in range(num_train_epochs):\n",
+    "        text_encoder.train()\n",
+    "        for step, batch in enumerate(train_dataloader):\n",
+    "            with accelerator.accumulate(text_encoder):\n",
+    "                # Convert images to latent space\n",
+    "                latents = vae.encode(batch[\"pixel_values\"]).latent_dist.sample().detach()\n",
+    "                latents = latents * 0.18215\n",
+    "\n",
+    "                # Sample noise that we'll add to the latents\n",
+    "                noise = torch.randn(latents.shape).to(latents.device)\n",
+    "                bsz = latents.shape[0]\n",
+    "                # Sample a random timestep for each image\n",
+    "                timesteps = torch.randint(0, noise_scheduler.num_train_timesteps, (bsz,), device=latents.device).long()\n",
+    "\n",
+    "                # Add noise to the latents according to the noise magnitude at each timestep\n",
+    "                # (this is the forward diffusion process)\n",
+    "                noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)\n",
+    "\n",
+    "                # Get the text embedding for conditioning\n",
+    "                encoder_hidden_states = text_encoder(batch[\"input_ids\"])[0]\n",
+    "\n",
+    "                # Predict the noise residual\n",
+    "                noise_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample\n",
+    "\n",
+    "                loss = F.mse_loss(noise_pred, noise, reduction=\"none\").mean([1, 2, 3]).mean()\n",
+    "                accelerator.backward(loss)\n",
+    "\n",
+    "                # Zero out the gradients for all token embeddings except the newly added\n",
+    "                # embeddings for the concept, as we only want to optimize the concept embeddings\n",
+    "                if accelerator.num_processes > 1:\n",
+    "                    grads = text_encoder.module.get_input_embeddings().weight.grad\n",
+    "                else:\n",
+    "                    grads = text_encoder.get_input_embeddings().weight.grad\n",
+    "                # Get the index for tokens that we want to zero the grads for\n",
+    "                index_grads_to_zero = torch.arange(len(tokenizer)) != placeholder_token_id\n",
+    "                grads.data[index_grads_to_zero, :] = grads.data[index_grads_to_zero, :].fill_(0)\n",
+    "\n",
+    "                optimizer.step()\n",
+    "                optimizer.zero_grad()\n",
+    "\n",
+    "            # Checks if the accelerator has performed an optimization step behind the scenes\n",
+    "            if accelerator.sync_gradients:\n",
+    "                progress_bar.update(1)\n",
+    "                global_step += 1\n",
+    "\n",
+    "            logs = {\"loss\": loss.detach().item()}\n",
+    "            progress_bar.set_postfix(**logs)\n",
+    "\n",
+    "            if global_step >= max_train_steps:\n",
+    "                break\n",
+    "\n",
+    "        accelerator.wait_for_everyone()\n",
+    "\n",
+    "\n",
+    "    # Create the pipeline using using the trained modules and save it.\n",
+    "    if accelerator.is_main_process:\n",
+    "        pipeline = StableDiffusionPipeline(\n",
+    "            text_encoder=accelerator.unwrap_model(text_encoder),\n",
+    "            vae=vae,\n",
+    "            unet=unet,\n",
+    "            tokenizer=tokenizer,\n",
+    "            scheduler=PNDMScheduler(\n",
+    "                beta_start=0.00085, beta_end=0.012, beta_schedule=\"scaled_linear\", skip_prk_steps=True\n",
+    "            ),\n",
+    "            safety_checker=StableDiffusionSafetyChecker.from_pretrained(\"CompVis/stable-diffusion-safety-checker\"),\n",
+    "            feature_extractor=CLIPFeatureExtractor.from_pretrained(\"openai/clip-vit-base-patch32\"),\n",
+    "        )\n",
+    "        pipeline.save_pretrained(output_dir)\n",
+    "        # Also save the newly trained embeddings\n",
+    "        learned_embeds = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight[placeholder_token_id]\n",
+    "        learned_embeds_dict = {placeholder_token: learned_embeds.detach().cpu()}\n",
+    "        torch.save(learned_embeds_dict, os.path.join(output_dir, \"learned_embeds.bin\"))\n",
+    "\n",
+    "import accelerate\n",
+    "accelerate.notebook_launcher(training_function, args=(text_encoder, vae, unet), num_processes=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fda9c1eb-8a2d-4da9-a98c-8c6e717eb215",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}