EveryDream2trainer/Train_Colab.ipynb

{
  "cells": [
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "view-in-github"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/nawnie/EveryDream2trainer/blob/main/Train_Colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "blaLMSbkPHhG"
      },
      "source": [
        "<p align=\"center\">\n",
        "  <img src=\"https://github.com/victorchall/EveryDream2trainer/blob/562c4341137d1d9f5bf525e6c56fb4b1eefa2b57/doc/ed_logo_comp.jpg?raw=true\" width=\"600\" height=\"300\">\n",
        "</p>\n",
        "\n",
        "<br>\n",
        "\n",
        "---\n",
        "\n",
        "<div align=\"center\">\n",
        "  <font size=\"6\" color=\"yellow\">Colab Edition</font>\n",
        "</div>\n",
        "\n",
        "---\n",
        "\n",
        "<br>\n",
        "\n",
        "Check out the **EveryDream2trainer** documentation and runpod/vastai and local setups here: \n",
        "\n",
        "[📚 **Documentation**](https://github.com/victorchall/EveryDream2trainer#docs)\n",
        "\n",
        "And join our vibrant community on Discord:\n",
        "\n",
        "[💬 **Join the Discord**](https://discord.gg/uheqxU6sXN)\n",
        "\n",
        "If you find this tool useful, please consider subscribing to the project on Patreon or making a one-time donation on Ko-fi. Your donations keep this project alive as a free open-source tool with ongoing enhancements.\n",
        "\n",
        "<br>\n",
        "\n",
        "<p align=\"center\">\n",
        "  <a href=\"https://www.patreon.com/everydream\">\n",
        "    <img src=\"https://github.com/victorchall/EveryDream2trainer/raw/main/.github/patreon-medium-button.png?raw=true\" width=\"200\" height=\"50\">\n",
        "  </a>\n",
        "</p>\n",
        "\n",
        "<br>\n",
        "\n",
        "<p align=\"center\">\n",
        "  <a href=\"https://ko-fi.com/everydream\">\n",
        "    <img src=\"https://github.com/victorchall/EveryDream2trainer/raw/main/.github/kofibutton_sm.png?raw=true\" width=\"75\" height=\"75\">\n",
        "  </a>\n",
        "</p>\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "hAuBbtSvGpau"
      },
      "outputs": [],
      "source": [
        "#@markdown # Setup and Install Dependencies\n",
        "from IPython.display import clear_output, display, HTML\n",
        "import subprocess\n",
        "import time\n",
        "import os \n",
        "from tqdm.auto import tqdm\n",
        "import PIL\n",
        "\n",
        "# Defining function for colored text\n",
        "def colored(r, g, b, text):\n",
        "    return f\"\\033[38;2;{r};{g};{b}m{text} \\033[38;2;255;255;255m\"\n",
        "\n",
        "#@markdown Optional connect Gdrive  But strongly recommended\n",
        "#@markdown This will let you put all your training data and checkpoints directly on your drive.  \n",
        "#@markdown Much faster/easier to continue later, less setup time.\n",
        "\n",
        "#@markdown Creates /content/drive/MyDrive/everydreamlogs/ckpt\n",
        "Mount_to_Gdrive = True #@param{type:\"boolean\"} \n",
        "\n",
        "# Clone the git repository\n",
        "print(colored(0, 255, 0, 'Cloning git repository...'))\n",
        "!git clone https://github.com/victorchall/EveryDream2trainer.git\n",
        "\n",
        "if Mount_to_Gdrive:\n",
        "  from google.colab import drive\n",
        "  drive.mount('/content/drive')\n",
        "\n",
        "  !mkdir -p /content/drive/MyDrive/everydreamlogs/ckpt\n",
        "\n",
        "%cd /content/EveryDream2trainer\n",
        "\n",
        "# Download Arial font\n",
        "print(colored(0, 255, 0, 'Downloading Arial font...'))\n",
        "!wget -O arial.ttf https://raw.githubusercontent.com/matomo-org/travis-scripts/master/fonts/Arial.ttf\n",
        "\n",
        "packages = [\n",
        "    'transformers==4.29.2',\n",
        "    'diffusers[torch]==0.17.1',\n",
        "    'pynvml==11.4.1',\n",
        "    'bitsandbytes==0.38.1',\n",
        "    'ftfy==6.1.1',\n",
        "    'aiohttp==3.8.4',\n",
        "    'compel~=1.1.3',\n",
        "    'protobuf==3.20.1',\n",
        "    'wandb==0.15.3',\n",
        "    'pyre-extensions==0.0.29',\n",
        "    'xformers==0.0.20',\n",
        "    'pytorch-lightning==1.6.5',\n",
        "    'OmegaConf==2.2.3',\n",
        "    'tensorboard>=2.11.0',\n",
        "    'tensorrt'\n",
        "    'wandb',\n",
        "    'colorama',\n",
        "    'keyboard',\n",
        "    'lion-pytorch'\n",
        "]\n",
        "\n",
        "print(colored(0, 255, 0, 'Installing packages...'))\n",
        "for package in tqdm(packages, desc='Installing packages', unit='package', bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt}'):\n",
        "    if isinstance(package, tuple):\n",
        "        package_name, extra_index_url = package\n",
        "        cmd = f\"pip install -I -q {package_name} --extra-index-url {extra_index_url}\"\n",
        "    else:\n",
        "        cmd = f\"pip install -q {package}\"\n",
        "        \n",
        "    subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n",
        "\n",
        "clear_output()\n",
        "\n",
        "\n",
        "# Execute Python script\n",
        "print(colored(0, 255, 0, 'Executing Python script...'))\n",
        "!python utils/get_yamls.py\n",
        "clear_output()\n",
        "\n",
        "print(colored(0, 255, 0, \"DONE! installing dependencies.\"))\n",
        "\n",
        "# Import pynvml and get GPU details\n",
        "import pynvml\n",
        "\n",
        "pynvml.nvmlInit()\n",
        "\n",
        "handle = pynvml.nvmlDeviceGetHandleByIndex(0)\n",
        "\n",
        "gpu_name = pynvml.nvmlDeviceGetName(handle)\n",
        "gpu_memory = pynvml.nvmlDeviceGetMemoryInfo(handle).total / 1024**3\n",
        "cuda_version_number = pynvml.nvmlSystemGetCudaDriverVersion_v2()\n",
        "cuda_version_major = cuda_version_number // 1000\n",
        "cuda_version_minor = (cuda_version_number % 1000) // 10\n",
        "cuda_version = f\"{cuda_version_major}.{cuda_version_minor}\"\n",
        "\n",
        "pynvml.nvmlShutdown()\n",
        "\n",
        "Python_version = !python --version\n",
        "import torch\n",
        "import torchvision\n",
        "import xformers\n",
        "\n",
        "display(HTML(f\"\"\"\n",
        "<table style=\"background-color:transparent;\">\n",
        "  <tr>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">Python version:</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">{Python_version[0]}</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">GPU Name:</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">{gpu_name}</span></td>\n",
        "  </tr>\n",
        "  <tr>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">PyTorch version:</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">{torch.__version__}</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">GPU Memory (GB):</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">{gpu_memory:.2f}</span></td>\n",
        "  </tr>\n",
        "  <tr>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">Torchvision version:</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">{torchvision.__version__}</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">CUDA version:</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">{cuda_version}</span></td>\n",
        "  </tr>\n",
        "  <tr>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">XFormers version:</span></td>\n",
        "    <td style=\"background-color:transparent;\"><span style=\"color: #FFFF00;\">{xformers.__version__}</span></td>\n",
        "  </tr>\n",
        "</table>\n",
        "\"\"\"))\n",
        "\n",
        "time.sleep(2)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "unaffeqGP_0A"
      },
      "outputs": [],
      "source": [
        "#@title Get A Base Model\n",
        "#@markdown Choose SD1.5, Waifu Diffusion 1.3, SD2.1, or 2.1(512) from the dropdown, or paste your own URL in the box\n",
        "#@markdown * Alternately you can link to an HF repo using NAME/MODEL, this does not save to your Gdrive, if you want to save an HF model, use the direct URL\n",
        "\n",
        "#@markdown * Link to a set of diffusers on your Gdrive\n",
        "\n",
        "#@markdown * Paste a URL, atm there is no support for .safetensors\n",
        "\n",
        "from IPython.display import clear_output\n",
        "!mkdir input\n",
        "%cd /content/EveryDream2trainer\n",
        "MODEL_LOCATION = \"panopstor/EveryDream\" #@param [\"sd_v1-5+vae.ckpt\", \"hakurei/waifu-diffusion-v1-3\", \"stabilityai/stable-diffusion-2-1-base\", \"stabilityai/stable-diffusion-2-1\"] {allow-input: true}\n",
        "\n",
        "if MODEL_LOCATION == \"sd_v1-5+vae.ckpt\":\n",
        "  MODEL_LOCATION = \"panopstor/EveryDream\"\n",
        "\n",
        "If_Ckpt = False\n",
        "import os\n",
        "\n",
        "download_path = \"\"\n",
        "\n",
        "if \".co\" in MODEL_LOCATION or \"https\" in MODEL_LOCATION or \"www\" in MODEL_LOCATION:\n",
        "    MODEL_URL = MODEL_LOCATION\n",
        "    print(\"Downloading...\")\n",
        "    !wget $MODEL_LOCATION\n",
        "    clear_output()\n",
        "    print(\"Download completed!\")\n",
        "    download_path = os.path.join(os.getcwd(), os.path.basename(MODEL_URL))\n",
        "else:\n",
        "    save_name = MODEL_LOCATION\n",
        "\n",
        "%cd /content/EveryDream2trainer\n",
        "\n",
        "inference_yaml = \" \"\n",
        "\n",
        "# Check if the downloaded or copied model is a .ckpt file\n",
        "#@markdown Is the model 1.5 or 2.1 based?\n",
        "model_type = \"SD1x\" #@param [\"SD1x\", \"SD2_512_base\", \"SD21\"]\n",
        "\n",
        "if download_path.endswith(\".ckpt\") or MODEL_LOCATION.endswith(\".ckpt\"):\n",
        "    If_Ckpt = True\n",
        "    save_path = download_path\n",
        "    if \".ckpt\" in save_name:\n",
        "        save_name = save_name.replace(\".ckpt\", \"\")\n",
        "    \n",
        "    img_size = 512\n",
        "    upscale_attention = False\n",
        "    prediction_type = \"epsilon\"\n",
        "\n",
        "    if model_type == \"SD1x\":\n",
        "        inference_yaml = \"v1-inference.yaml\"\n",
        "    elif model_type == \"SD2_512_base\":\n",
        "        upscale_attention = True\n",
        "        inference_yaml = \"v2-inference.yaml\"\n",
        "    elif model_type == \"SD21\":\n",
        "        upscale_attention = True\n",
        "        prediction_type = \"v_prediction\"\n",
        "        inference_yaml = \"v2-inference-v.yaml\"\n",
        "        img_size = 768\n",
        "\n",
        "    !python utils/convert_original_stable_diffusion_to_diffusers.py --scheduler_type ddim \\\n",
        "    --original_config_file $inference_yaml \\\n",
        "    --image_size $img_size \\\n",
        "    --checkpoint_path $MODEL_LOCATION \\\n",
        "    --prediction_type $prediction_type \\\n",
        "    --upcast_attn False \\\n",
        "    --dump_path $save_name\n",
        "\n",
        "# Set the save path to the GDrive directory if cache_to_gdrive is True\n",
        "if If_Ckpt:\n",
        "    save_name = os.path.join(\"/content/drive/MyDrive/everydreamlogs/ckpt\", save_name)\n",
        "\n",
        "if inference_yaml != \" \":\n",
        "    print(\"Model saved to: \" + save_name + \". The \" + inference_yaml + \" was used!\")\n",
        "\n",
        "print(\"Model \" + save_name + \" will be used!\")"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "EHyFzKWXX9kB"
      },
      "source": [
        "# Training\n",
        "\n",
        "For a more indepth Explanation of each of these paramaters check out /content/EveryDream2trainer/doc.\n",
        "\n",
        "After youve tried a few models you will find /content/EveryDream2trainer/doc/ADVANCED_TWEAKING.md to be extremly helpful."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "j9pEI69WXS9w"
      },
      "outputs": [],
      "source": [
        "from google.colab import runtime\n",
        "from IPython.display import clear_output, display\n",
        "import time\n",
        "import sys\n",
        "import time\n",
        "import shutil\n",
        "import json\n",
        "import os\n",
        "\n",
        "#@title  #Run Everydream 2\n",
        "%cd /content/EveryDream2trainer\n",
        "#@markdown If you want to use a .json config or upload your own, skip this cell and run the cell below instead\n",
        "!rm -r /content/EveryDream2trainer/Training_Data\n",
        "#@markdown * Save logs and output ckpts to Gdrive (strongly suggested, will need space)\n",
        "Save_to_Gdrive = True #@param{type:\"boolean\"}\n",
        "#@markdown * Save extra state information to help continue training (if you will resume later, requires high RAM shape)\n",
        "Save_optimizer_state = False #@param{type:\"boolean\"}\n",
        "#@markdown * Disconnect after training to save Credits \n",
        "Disconnect_after_training =  False #@param{type:\"boolean\"}\n",
        "#@markdown * Use resume to contnue training you just ran, will also find latest diffusers log in your Gdrive to continue.\n",
        "resume = False #@param{type:\"boolean\"}\n",
        "#@markdown * Name your project so you can find it in your logs\n",
        "Project_Name = \"My_Project\" #@param{type: 'string'}\n",
        "\n",
        "\n",
        "\n",
        "if model_type == 'SD2_512_base' or model_type == 'SD21':\n",
        "    file_path = \"/content/EveryDream2trainer/optimizerSD21.json\"\n",
        "else:\n",
        "    file_path = \"/content/EveryDream2trainer/optimizer.json\"\n",
        "\n",
        "with open(file_path, 'r') as file:\n",
        "    data = json.load(file)\n",
        "\n",
        "\n",
        "#@markdown * The learning rate affects how much \"training\" is done on the model per training step. It is a very careful balance to select a value that will learn your data and not wreck the model. \n",
        "#@markdown Leave this default unless you are very comfortable with training and know what you are doing.\n",
        "Learning_Rate = 1e-6 #@param{type: 'number'}\n",
        "#@markdown * chosing this will allow you to ignore any settings specific to the text encode and will match it with the Unets settings, recommended for beginers.\n",
        "Match_text_to_Unet = False #@param{type:\"boolean\"}\n",
        "Text_lr =  5e-7 #@param {type:\"number\"}\n",
        "#@markdown * A learning rate scheduler can change your learning rate as training progresses.\n",
        "#@markdown * I recommend sticking with constant until you are comfortable with general training. \n",
        "Schedule = \"constant\" #@param [\"constant\", \"polynomial\", \"linear\", \"cosine\"] {allow-input: true}\n",
        "Text_lr_scheduler = \"cosine\" #@param [\"constant\", \"polynomial\", \"linear\", \"cosine\"] {allow-input: true}\n",
        "#@markdown * warm up steps are useful for validation and cosine lrs\n",
        "lr_warmup_steps = 0 #@param{type:\"integer\"}\n",
        "lr_decay_steps = 0 #@param {type:\"number\"}         \n",
        "Text_lr_warmup_steps = 0 #@param {type:\"number\"}\n",
        "Text_lr_decay_steps = 0 #@param {type:\"number\"}         \n",
        "\n",
        "if Match_text_to_Unet:\n",
        "  Text_lr = Learning_Rate\n",
        "  Text_lr_scheduler = Schedule\n",
        "  Text_lr_warmup_steps = lr_warmup_steps\n",
        "\n",
        "data['base']['lr'] = Learning_Rate\n",
        "data['text_encoder_overrides']['lr'] = Text_lr \n",
        "data['base']['lr_scheduler'] = Schedule\n",
        "data['text_encoder_overrides']['lr_scheduler'] = Text_lr_scheduler\n",
        "data['base']['lr_warmup_steps'] = lr_warmup_steps\n",
        "data['base']['lr_decay_steps'] = lr_decay_steps\n",
        "data['text_encoder_overrides']['lr_warmup_steps'] = Text_lr_warmup_steps\n",
        "data['text_encoder_overrides']['lr_decay_steps'] = Text_lr_decay_steps\n",
        "\n",
        "# Save the updated JSON data back to the file\n",
        "with open(file_path, 'w') as file:\n",
        "    json.dump(data, file, indent=4)\n",
        "\n",
        "#@markdown * Resolution to train at (recommend 512).  Higher resolution will require lower batch size (below).\n",
        "Resolution = 512 #@param {type:\"slider\", min:256, max:768, step:64}\n",
        "\n",
        "#@markdown * Batch size is also another \"hyperparameter\" of itself and there are tradeoffs. It may not always be best to use the highest batch size possible. Once of the primary reasons to change it is if you get \"CUDA out of memory\" errors where lowering the value may help.\n",
        "\n",
        "#@markdown * Batch size impacts VRAM use.  8 should work on SD1.x models and 5 for SD2.x models at 512 resolution.  Lower this if you get CUDA out of memory errors. You can check resources on your instance and watch the GPU RAM.\n",
        "\n",
        "Batch_Size = 6 #@param{type: 'number'}\n",
        "\n",
        "#@markdown * Gradient accumulation is sort of like a virtual batch size increase use this to increase batch size with out increasing vram usage\n",
        "#@markdown Increasing from 1 to 2 will have a minor impact on vram use, but more beyond that will not.\n",
        "#@markdown In colab free teir you can expect the fastest proformance from a batch of 8 and a gradient step of 1\n",
        "#@markdown This is mostly for use if you are training higher resolution on free tier and cannot increase batch size.\n",
        "\n",
        "Gradient_steps = 1 #@param{type:\"slider\", min:1, max:10, step:1}\n",
        "\n",
        "#@markdown * Location on your Gdrive where your training images are.\n",
        "Dataset_Location = \"/content/drive/MyDrive/training_samples\" #@param {type:\"string\"}\n",
        "\n",
        "if not resume:\n",
        "  model = save_name\n",
        "\n",
        "#@markdown * Max Epochs to train for, this defines how many total times all your training data is used. Default of 100 is a good start if you are training ~30-40 images of one subject. If you have 100 images, you can reduce this to 40-50 and so forth.\n",
        "\n",
        "Max_Epochs = 100 #@param {type:\"slider\", min:0, max:200, step:1}\n",
        "\n",
        "#@markdown * How often to save checkpoints.\n",
        "Save_every_N_epoch = 20 #@param{type:\"integer\"}\n",
        "\n",
        "#@markdown * Test sample generation steps, how often to generate samples during training.\n",
        "\n",
        "#@markdown You can set your own sample prompts by adding them, one line at a time, to `/content/EveryDream2trainer/sample_prompts.txt`.  If left empty, it will use the captions from your training images.\n",
        "\n",
        "#@markdown Use the steps_between_samples to set how often the samples are generated.\n",
        "Steps_between_samples = 300 #@param{type:\"integer\"}\n",
        "\n",
        "#@markdown * That's it!  Run the cell! or configure these advance options\n",
        "\n",
        "#@markdown # ________________ ADV SETTINGS _________________\n",
        "#@markdown These are the default Every Dream 2 settings, changing these without learning what they do will likley waste compute credits\n",
        "#@markdown please read the doc folder before changing these!\n",
        "\n",
        "#@markdown * A tip using the sliders, to finely adjust these click them with your mouse then use your keyboard arrows\n",
        "\n",
        "#@markdown * Using the same seed each time you train allows for more accurate a/b comparison of models, leave at -1 for random\n",
        "#@markdown * The seed also effects your training samples, if you want the same seed each sample you will need to change it from -1\n",
        "Training_Seed = -1 #@param{type:\"integer\"}\n",
        "\n",
        "#@markdown * use this option to configure a sample_prompts.json\n",
        "#@markdown * check out /content/EveryDream2trainer/doc/logging.md. for more details\n",
        "Advance_Samples = False #@param{type:\"boolean\"}\n",
        "Sample_File = \"sample_prompts.txt\"\n",
        "if Advance_Samples:\n",
        "  Sample_File = \"sample_prompts.json\"\n",
        "#@markdown * Checkpointing Saves Vram to allow larger batch sizes minor slow down on a single batch size but will can allow room for a higher traning resolution (suggested on Colab Free tier, turn off for A100)\n",
        "Gradient_checkpointing = True #@param{type:\"boolean\"}\n",
        "Disable_Xformers = False #@param{type:\"boolean\"}\n",
        "#@markdown * Tag shuffling, mainly for booru training. Best to just read this if interested in shufflng tags /content/EveryDream2trainer/doc/SHUFFLING_TAGS.md\n",
        "shuffle_tags = False #@param{type:\"boolean\"}\n",
        "#@markdown * You can turn off the text encoder training (generally not suggested)\n",
        "Disable_text_Encoder= False #@param{type:\"boolean\"}\n",
        "#@markdown * Skip the nth last layer of CLIP.\n",
        "Clip_skip = 1 #@param {type:\"slider\", min:0, max:4, step:1}\n",
        "#@markdown * ratio of captions dropped from training data.\n",
        "Conditional_DropOut = 0.04 #@param {type:\"slider\", min:0, max:0.3, step:0.01}\n",
        "#@markdown * Ratio of images randomly to flip horizontally.\n",
        "Picture_flip = 0 #@param {type:\"slider\", min:0, max:0.5, step:0.05}\n",
        "#@markdown * This can improve contrast in light and dark scenes, Use a ratio between 0-10% for Best results.\n",
        "zero_frequency_noise = 0.01 #@param {type:\"slider\", min:0, max:0.12, step:0.01}\n",
        "\n",
        "#@markdown * Weights and Biases logging token. \n",
        "# #@markdown Paste your token here if you have an account so you can use it to track your training progress.  If you don't have an account, you can create one for free at https://wandb.ai/site.  Log will use your project name from above. This is a free online logging utility.\n",
        "# #@markdown Your key is on this page: https://wandb.ai/settings under \"Danger Zone\" \"API Keys\"\n",
        "wandb_token = '' #@param{type:\"string\"}\n",
        "\n",
        "\n",
        "#@markdown use validation with wandb\n",
        "\n",
        "validatation = False #@param{type:\"boolean\"}\n",
        "Hide_Warnings = False #@param {type:\"boolean\"}\n",
        "\n",
        "extensions = ['.zip', '.7z', '.rar', '.tgz']\n",
        "uncompressed_dir = 'Training_Data'\n",
        "\n",
        "if any(ext in Dataset_Location for ext in extensions):\n",
        "    # Create the uncompressed directory if it doesn't exist\n",
        "    if not os.path.exists(uncompressed_dir):\n",
        "        os.makedirs(uncompressed_dir)\n",
        "    \n",
        "    # Extract the compressed file to the uncompressed directory\n",
        "    shutil.unpack_archive(Dataset_Location, uncompressed_dir)\n",
        "\n",
        "    # Set the dataset location to the uncompressed directory\n",
        "    Dataset_Location = uncompressed_dir\n",
        "\n",
        "# Use the dataset location in the rest of your code\n",
        "dataset = Dataset_Location\n",
        "\n",
        "Drive=\"\"\n",
        "if Save_to_Gdrive:\n",
        "  Drive = \"--logdir /content/drive/MyDrive/everydreamlogs --save_ckpt_dir /content/drive/MyDrive/everydreamlogs/ckpt\"\n",
        "\n",
        "if Max_Epochs==0:\n",
        "  Max_Epoch=1\n",
        "\n",
        "if resume:\n",
        "  model = \"findlast\"\n",
        "\n",
        "validate = \"\"\n",
        "if validatation:\n",
        "  validate = \"--validation_config validation_default.json\"\n",
        "\n",
        "\n",
        "if Hide_Warnings:\n",
        "  import warnings\n",
        "  warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "wandb_settings = \"\"\n",
        "if wandb_token:\n",
        "  !rm /root/.netrc\n",
        "  !wandb login $wandb_token\n",
        "  wandb_settings = \"--wandb\"\n",
        "\n",
        "Gradient = \"\"\n",
        "if Gradient_checkpointing:\n",
        "  Gradient = \"--gradient_checkpointing \"\n",
        "\n",
        "DX = \"\"  \n",
        "if Disable_Xformers:\n",
        "  DX = \"--disable_xformers \"\n",
        "\n",
        "shuffle = \"\"\n",
        "if shuffle_tags:\n",
        "  shuffle = \"--shuffle_tags \"\n",
        "\n",
        "Save_optimizer = \"\"\n",
        "if Save_optimizer_state:\n",
        "  Save_optimizer = \"--save_optimizer\"\n",
        "\n",
        "textencode = \"\"\n",
        "if Disable_text_Encoder:\n",
        "  textencode = \"--disable_textenc_training\"\n",
        "\n",
        "%run train.py --resume_ckpt \"$model\" \\\n",
        "  $textencode \\\n",
        "  $Gradient \\\n",
        "  $shuffle \\\n",
        "  $Drive \\\n",
        "  $DX \\\n",
        "  $validate \\\n",
        "  $wandb_settings \\\n",
        "  $Save_optimizer \\\n",
        "  --clip_skip $Clip_skip \\\n",
        "  --batch_size $Batch_Size \\\n",
        "  --grad_accum $Gradient_steps \\\n",
        "  --cond_dropout $Conditional_DropOut \\\n",
        "  --data_root \"$dataset\" \\\n",
        "  --flip_p $Picture_flip \\\n",
        "  --log_step 25 \\\n",
        "  --max_epochs $Max_Epochs \\\n",
        "  --project_name \"$Project_Name\" \\\n",
        "  --resolution $Resolution \\\n",
        "  --sample_prompts \"$Sample_File\" \\\n",
        "  --sample_steps $Steps_between_samples \\\n",
        "  --save_every_n_epoch $Save_every_N_epoch \\\n",
        "  --seed $Training_Seed \\\n",
        "  --zero_frequency_noise_ratio $zero_frequency_noise\n",
        "\n",
        "# Finish the training process\n",
        "# clear_output()\n",
        "time.sleep(2)\n",
        "print(\"Training is complete, select a model to start training again\")\n",
        "time.sleep(2)\n",
        "\n",
        "if Disconnect_after_training:\n",
        "  print(\"Training is complete, in 30 seconds the instance will reset, you will need to chose a model and you can resume training again\")\n",
        "  time.sleep(40)\n",
        "  runtime.unassign()\n",
        "\n",
        "os.kill(os.getpid(), 9)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "8HmIWtODuE6p"
      },
      "outputs": [],
      "source": [
        "#@title Test your Diffusers\n",
        "import torch\n",
        "from diffusers import StableDiffusionPipeline, DiffusionPipeline, DPMSolverMultistepScheduler\n",
        "import torch\n",
        "#from xformers.ops import MemoryEfficientAttentionFlashAttentionOp\n",
        "\n",
        "repo_id = \"stabilityai/stable-diffusion-2-base\"\n",
        "pipe = DiffusionPipeline.from_pretrained(repo_id, torch_dtype=torch.float16, revision=\"fp16\")\n",
        "\n",
        "pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)\n",
        "pipe = pipe.to(\"cuda\")\n",
        "\n",
        "prompt = \"High quality photo of an astronaut riding a horse in space\"\n",
        "image = pipe(prompt, num_inference_steps=25).images[0]\n",
        "image.save(\"astronaut.png\")\n",
        "\n",
        "#@markdown Path to the diffusers that was trained\n",
        "\n",
        "#@markdown You can look in the file drawer on the left /content/drive/MyDrive/everydreamlogs and click the three dots to copy the path\n",
        "\n",
        "#@markdown ex. /content/drive/MyDrive/everydreamlogs/my_project_20230126-023804/ckpts/interrupted-gs86\n",
        "\n",
        "sd2 = True #@param {type:\"boolean\"}\n",
        "diffusers_path=\"stabilityai/stable-diffusion-2-1-base\" #@param{type:\"string\"}\n",
        "\n",
        "PROMPT= \"a photo of an astronaut on the moon\" #@param{type:\"string\"}\n",
        "number_of_inference_steps= 35 #@param{type:\"integer\"}\n",
        "sample_name = \"test\" #@param{type:\"string\"}\n",
        "n_propmt = \"bad, deformed, ugly, bad anotomy\"\n",
        "\n",
        "#pipe.load_textual_inversion(\"sd-concepts-library/cat-toy\")\n",
        "#pipe.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)\n",
        "\n",
        "\n",
        "\n",
        "if sd2:\n",
        "  pipe = DiffusionPipeline.from_pretrained(repo_id, torch_dtype=torch.float16, revision=\"fp16\")\n",
        "else:\n",
        "  pipe = StableDiffusionPipeline.from_pretrained(diffusers_path, torch_dtype=torch.float16)\n",
        "\n",
        "pipe = pipe.to(\"cuda\")\n",
        "\n",
        "image = pipe(PROMPT, num_inference_steps=number_of_inference_steps, negative_prompt=n_propmt).images[0]\n",
        "\n",
        "\n",
        "image.save(sample_name + \".png\")"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "fzXLJVC6OCeP"
      },
      "source": [
        "## Optional NoteBook Features, read all the documentation in /content/EveryDream2trainer/doc before proceeding."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "colab": {
          "background_save": true
        },
        "id": "BafdWaYymg0O"
      },
      "outputs": [],
      "source": [
        "#@title Remove logs for samples when training (optional) run before training\n",
        "file_path = \"/content/EveryDream2trainer/utils/sample_generator.py\"\n",
        "\n",
        "# Read the file and store the lines\n",
        "with open(file_path, \"r\") as file:\n",
        "    lines = file.readlines()\n",
        "\n",
        "# The code block to remove\n",
        "code_to_remove = '''                    with open(f\"{self.log_folder}/samples/gs{global_step:05}-{sample_index}-{clean_prompt[:100]}.txt\", \"w\", encoding='utf-8') as f:\n",
        "                        f.write(str(batch[prompt_idx]))'''\n",
        "\n",
        "# Join the lines to create a single string\n",
        "content = ''.join(lines)\n",
        "\n",
        "# Replace the code block with an empty string\n",
        "content = content.replace(code_to_remove, '')\n",
        "\n",
        "# Write the modified content back to the file\n",
        "with open(file_path, \"w\") as file:\n",
        "    file.write(content)\n",
        "\n",
        "print(\"The specified code block has been deleted.\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "cellView": "form",
        "id": "Iuoa_1B9jRGU"
      },
      "outputs": [],
      "source": [
        "#@title Alternate startup script\n",
        "#@markdown * Edit train.json or chain0.json to setup your paramaters\n",
        "\n",
        "#@markdown * Edit using a chain length of 0 will use train.json\n",
        "\n",
        "#@markdown * Edit the above cell saving it as chain1.json to make use of chaining\n",
        "\n",
        "#@markdown * make sure to check each confguration you will need 1 Json per chain length 3 are provided\n",
        "\n",
        "#@markdown * your locations in the .json can be done in this format /content/drive/MyDrive/   - then the sub folder you wish to use\n",
        "\n",
        "%cd /content/EveryDream2trainer\n",
        "Chain_Length=0 #@param{type:\"integer\"}\n",
        "l = Chain_Length \n",
        "I=0 #repeat counter\n",
        "if l == None or l == 0:\n",
        "  train.py --config train.json\n",
        "while l > 0:\n",
        "  !python train.py --config chain{I}.json\n",
        "  l -= 1\n",
        "  I =+ 1"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "ls6mX94trxZV"
      },
      "source": [
        "Need some tools to Manage your large datasets check out https://github.com/victorchall/EveryDream for some usefull tools and captioner"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "include_colab_link": true,
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.10.5"
    },
    "vscode": {
      "interpreter": {
        "hash": "e602395b73d27e246c3f66de86a1ed4dc1e5a85e8356fd1a2f027b9d2f1f8162"
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}