diff --git a/Train_Runpod.ipynb b/Train_Runpod.ipynb index 840245b..705817a 100644 --- a/Train_Runpod.ipynb +++ b/Train_Runpod.ipynb @@ -30,22 +30,13 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "0902e735", + "cell_type": "markdown", + "id": "5123d4e6-281c-4475-99fd-328f4d5df734", "metadata": {}, - "outputs": [], "source": [ - "# When running on a pod designed for Automatic 1111 \n", - "# we need to kill the webui process to free up mem for training\n", - "!ps x | grep -E \"(relauncher|webui)\" | grep -v \"grep\" | awk '{print $1}' | xargs kill $1\n", + "# For best results, restart the pod after the next cell completes\n", "\n", - "# check system resources, make sure your GPU actually has 24GB\n", - "# You should see something like \"0 MB / 24576 MB\" in the middle of the printout\n", - "# if you see 0 MB / 22000 MB pick a beefier instance...\n", - "!grep Swap /proc/meminfo\n", - "!swapon -s\n", - "!nvidia-smi" + "Here we ensure that EveryDream2trainer is installed, and we disable the Automatic 1111 web-ui. But the vram consumed by the web-ui will not be fully freed until the pod restarts. This is especially important if you are training with large batch sizes." ] }, { @@ -58,13 +49,34 @@ "import os\n", "\n", "%cd /workspace\n", + "!echo pass > /workspace/stable-diffusion-webui/relauncher.py\n", "if not os.path.exists(\"EveryDream2trainer\"):\n", " !git clone https://github.com/victorchall/EveryDream2trainer\n", "\n", "%cd EveryDream2trainer\n", + "%mkdir input\n", "!python utils/get_yamls.py" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "0902e735", + "metadata": {}, + "outputs": [], + "source": [ + "# When running on a pod designed for Automatic 1111 \n", + "# we need to kill the webui process to free up mem for training\n", + "!ps x | grep -E \"(relauncher|webui)\" | awk '{print $1}' | xargs kill $1\n", + "\n", + "# check system resources, make sure your GPU actually has 24GB\n", + "# You should see something like \"0 MB / 24576 MB\" in the middle of the printout\n", + "# if you see 0 MB / 22000 MB pick a beefier instance...\n", + "!grep Swap /proc/meminfo\n", + "!swapon -s\n", + "!nvidia-smi" + ] + }, { "cell_type": "markdown", "id": "0bf1e8cd", @@ -102,9 +114,9 @@ "!python -m pip install --upgrade pip\n", "\n", "!pip install requests==2.25.1\n", - "!pip install -U -I torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url \"https://download.pytorch.org/whl/cu116\"\n", + "!pip install -U -I torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url \"https://download.pytorch.org/whl/cu117\"\n", "!pip install transformers==4.25.1\n", - "!pip install -U diffusers[torch]==0.10.2\n", + "!pip install -U diffusers[torch]\n", "\n", "!pip install pynvml==11.4.1\n", "!pip install bitsandbytes==0.35.0\n", @@ -115,20 +127,8 @@ "!pip install wandb==0.13.6\n", "!pip install colorama==0.4.6\n", "!pip install -U triton\n", - "!pip install -U ninja\n", - "\n", - "from subprocess import getoutput\n", - "s = getoutput('nvidia-smi')\n", - "\n", - "if \"A100\" in s:\n", - " print(\"Detected A100, installing stable xformers\")\n", - " !pip install -U xformers\n", - "else:\n", - " # A5000/3090/4090 support requires us to build xformers ourselves for now\n", - " print(\"Building xformers for SM86\")\n", - " !apt-get update && apt-get install -y gcc g++\n", - " !export TORCH_CUDA_ARCH_LIST=8.6 && pip install git+https://github.com/facebookresearch/xformers.git@48a77cc#egg=xformers\n", - "\n", + "!pip install --pre -U xformers\n", + " \n", "print(\"DONE\")" ] }, @@ -184,6 +184,7 @@ "metadata": {}, "outputs": [], "source": [ + "%cd /workspace/EveryDream2trainer\n", "repo=\"panopstor/EveryDream\"\n", "ckpt_file=\"sd_v1-5_vae.ckpt\"\n", "\n", @@ -230,16 +231,33 @@ "metadata": {}, "outputs": [], "source": [ - "!python train.py --project_name \"ft_v1a_512_15e7\" \\\n", - "--resume_ckpt \"{ckpt_name}\" \\\n", + "%cd /workspace/EveryDream2trainer\n", + "!python train.py --project_name \"sd1_mymodel_000\" \\\n", + "--resume_ckpt \"sd_v1-5_vae\" \\\n", + "--data_root \"input\" \\\n", + "--resolution 512 \\\n", + "--batch_size 8 \\\n", + "--max_epochs 100 \\\n", + "--save_every_n_epochs 50 \\\n", + "--lr 1.8e-6 \\\n", + "--lr_scheduler cosine \\\n", + "--sample_steps 250 \\\n", + "--useadam8bit \\\n", + "--save_full_precision \\\n", + "--shuffle_tags \\\n", + "--amp \\\n", + "--write_schedule\n", + "\n", + "!python train.py --project_name \"sd1_mymodel_100\" \\\n", + "--resume_ckpt \"findlast\" \\\n", "--data_root \"input\" \\\n", "--resolution 512 \\\n", "--batch_size 4 \\\n", - "--max_epochs 200 \\\n", - "--save_every_n_epochs 25 \\\n", - "--lr 1.5e-6 \\\n", + "--max_epochs 100 \\\n", + "--save_every_n_epochs 20 \\\n", + "--lr 1.0e-6 \\\n", "--lr_scheduler constant \\\n", - "--sample_steps 50 \\\n", + "--sample_steps 150 \\\n", "--useadam8bit \\\n", "--save_full_precision \\\n", "--shuffle_tags \\\n", @@ -247,38 +265,6 @@ "--write_schedule" ] }, - { - "cell_type": "markdown", - "id": "ca170883-2d18-4f34-adbb-aca5824a351b", - "metadata": {}, - "source": [ - "## You can chain togther a tuning schedule using `--resume_ckpt findlast`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f39291ef-bea9-471e-bbc7-7b681fa87eb3", - "metadata": {}, - "outputs": [], - "source": [ - "!python train.py --project_name \"ft_v1b_512_07e7\" \\\n", - "--resume_ckpt findlast \\\n", - "--data_root \"input\" \\\n", - "--resolution 512 \\\n", - "--batch_size 4 \\\n", - "--max_epochs 50 \\\n", - "--save_every_n_epochs 25 \\\n", - "--lr 0.7e-6 \\\n", - "--lr_scheduler constant \\\n", - "--sample_steps 50 \\\n", - "--useadam8bit \\\n", - "--save_full_precision \\\n", - "--shuffle_tags \\\n", - "--amp \\\n", - "--write_schedule\n" - ] - }, { "cell_type": "markdown", "id": "f24eee3d-f5df-45f3-9acc-ee0206cfe6b1", @@ -320,8 +306,8 @@ "!huggingface-cli lfs-enable-largefiles\n", "# fill in these three fields:\n", "hfusername = \"MyHfUser\"\n", - "reponame = \"MyRepo\"\n", - "ckpt_name = \"ft_v1b_512_15e7-ep200-gs02500.ckpt\"\n", + "reponame = \"MyHfRepo\"\n", + "ckpt_name = \"sd1_mymodel_000-ep100-gs01000.ckpt\"\n", "\n", "\n", "target_name = ckpt_name.replace('-','').replace('=','')\n", @@ -348,7 +334,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -362,7 +348,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.6.10" }, "vscode": { "interpreter": {