Streamline Vast/Runpod docker

This removes runtime installation steps to make vast.ai work out of the box and fixes some issues with dev containers
2023-02-27 00:20:27 +01:00 · 2023-02-27 00:20:27 +01:00 · deb83f42cb
parent 329ba2bfc1
commit deb83f42cb
15 changed files with 76 additions and 327 deletions
--- a/.devcontainer/gpu/devcontainer.json
+++ b/.devcontainer/gpu/devcontainer.json
@ -0,0 +1,24 @@
+{
+    "name": "EveryDream2 Dev Container",
+	// This Dockerfile requires buildx to be enabled by default
+	// `docker buildx install`
+	"build": {
+		"dockerfile": "../../docker/Dockerfile",
+		"target": "runtime"
+	},
+	"postStartCommand": "/start.sh",
+	"forwardPorts": [8888, 6006],
+	"containerEnv": {
+		"JUPYTER_PASSWORD": "EveryDream"
+	},
+	
+	// I don't have a GPU to test this with
+	"runArgs": [
+		"--gpus",
+		"all"
+	],
+
+	// Mimic RunPod/Vast setup
+	"workspaceMount": "source=${localWorkspaceFolder},target=/workspace/EveryDream2trainer,type=bind",
+	"workspaceFolder": "/workspace/EveryDream2trainer"
+}
--- a/.devcontainer/no_gpu/devcontainer.json
+++ b/.devcontainer/no_gpu/devcontainer.json
@ -1,12 +1,19 @@
 {
    "name": "EveryDream2 Dev Container",
-	"dockerFile": "../docker/Dockerfile",
-	"postStartCommand": "/start.sh",
-
-	"containerEnv": {
-		"LOCAL_DEV": "1"
+	// This Dockerfile requires buildx to be enabled by default
+	// `docker buildx install`
+	"build": {
+		"dockerfile": "../../docker/Dockerfile",
+		"target": "runtime"
 	},

+	"postStartCommand": "/start.sh",
+	"forwardPorts": [8888, 6006],
+	
+	"containerEnv": {
+		"JUPYTER_PASSWORD": "EveryDream"
+	},
+	
 	// Mimic RunPod/Vast setup
 	"workspaceMount": "source=${localWorkspaceFolder},target=/workspace/EveryDream2trainer,type=bind",
 	"workspaceFolder": "/workspace/EveryDream2trainer"
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@ -24,7 +24,7 @@ jobs:
      id: build-and-push
      uses: docker/build-push-action@v4
      with:
-        context: .
+        context: ./docker
        push: false
        file: docker/Dockerfile
        cache-from: type=gha
--- a/.github/workflows/docker-publish.yml
+++ b/.github/workflows/docker-publish.yml
@ -64,7 +64,7 @@ jobs:
        id: build-and-push
        uses: docker/build-push-action@v4
        with:
-          context: .
+          context: ./docker
          push: true
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
--- a/.gitignore
+++ b/.gitignore
@ -12,3 +12,4 @@
 /mycfgs/**
 /.vscode/**
 .ssh_config
+*inference*.yaml
--- a/Train_JupyterLab.ipynb
+++ b/Train_JupyterLab.ipynb
@ -49,39 +49,6 @@
    "You can set your own sample prompts by adding them, one line at a time, to sample_prompts.txt.### (Optional) Weights and Biases login. "
   ]
  },
-  {
-   "cell_type": "markdown",
-   "id": "82990f5c-0b55-4db1-a0d5-9d1fcab45ce8",
-   "metadata": {},
-   "source": [
-    "### (Optional) Weights and Biases login. \n",
-    "Paste your token here if you have an account so you can use it to track your training progress.  If you don't have an account, you can create one for free at https://wandb.ai/site.  Log will use your project name from above. This is a free online logging utility.\n",
-    "\n",
-    "Your key is on this page: https://wandb.ai/settings under \"Danger Zone\" \"API Keys\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cdbaf48c-f1e2-458d-b1ee-707f3b71bf61",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ipywidgets import *\n",
-    "\n",
-    "wandb_token = Password(placeholder=\"Optional Weights & Biases auth token\")\n",
-    "out = Output()\n",
-    "def wandb_login(_):\n",
-    "    with out:\n",
-    "        if wandb_token.value:\n",
-    "          !wandb login {wandb_token.value}\n",
-    "      \n",
-    "wandb_btn = Button(description=\"W&B Login\")\n",
-    "wandb_btn.on_click(wandb_login)\n",
-    "print()\n",
-    "display(VBox([wandb_token, wandb_btn, out]))"
-   ]
-  },
  {
   "cell_type": "markdown",
   "id": "3d9b0db8-c2b1-4f0a-b835-b6b2ef527019",
@ -158,7 +125,10 @@
    "\n",
    "Next cell runs training. This will take a while depending on your number of images, repeats, and max_epochs.\n",
    "\n",
-    "You can watch for test images in the logs folder."
+    "You can watch for test images in the logs folder.\n",
+    "\n",
+    "## Weights and Balanaces\n",
+    "I you pass the `--wandb` flag you will be prompted for your W&B `API Key`. W&B is a free online logging utility. If you don't have a W&B account, you can create one for free at https://wandb.ai/site. Your key is on this page: https://wandb.ai/settings under \"Danger Zone\" \"API Keys\""
   ]
  },
  {
@ -171,15 +141,8 @@
   },
   "outputs": [],
   "source": [
-    "wandb = \"\";\n",
-    "try:\n",
-    "    if wandb_token.value:\n",
-    "        wandb=\"--wandb\"\n",
-    "except NameError:\n",
-    "    print(\"W&B not configured... skipping.\")\n",
-    "\n",
-    "%run train.py --config train.json {wandb} \\\n",
-    "--resume_ckpt \"{ckpt_name}\" \\\n",
+    "%run train.py --config train.json \\\n",
+    "--resume_ckpt \"sd_v1-5_vae\" \\\n",
    "--project_name \"sd1_mymodel\" \\\n",
    "--data_root \"input\" \\\n",
    "--max_epochs 200 \\\n",
--- a/doc/CLOUD_SETUP.md
+++ b/doc/CLOUD_SETUP.md
@ -1,14 +1,30 @@
 # RunPod
 The simplest approach for RunPod is to use the [EveryDream2 template](https://runpod.io/gsc?template=d1v63jb36t&ref=bbp9dh8x) to load a fully configured docker image.

-When opening JupyterLab [Port 8888] you will be prompted for a password. The default password is `EveryDream`.
+## JupyterLab
+JupterLab will run on the pod by default. When opening JupyterLab `[Port 8888]` you will be prompted for a password. The default password is `EveryDream`. This can be changed by editing the pod's environment variables.

-![Selecting the RunPod template](runpod_config.png)
+## SSH
+You can also [enable full SSH support](https://www.runpod.io/blog/how-to-achieve-true-ssh-on-runpod) by setting the PUBLIC_KEY environment variable
+
+## Tensorboard
+Tensorboard will run automatically, and can be viewed on `[Port 6006]`

 # Vast
-The RunPod EveryDream2 docker image is also compatible with [vast.ai](https://console.vast.ai/), and does not require a JupyterLab password.
+The EveryDream2 docker image is also compatible with [vast.ai](https://console.vast.ai/).

-![Selecting the RunPod docker image](vastai_config.png)
+`ghcr.io/victorchall/everydream2trainer:main`

+## JupyterLab
+You can enable JupyterLab as part of the Vast.ai instance configuration. No JupyterLab password is required. 
+
+## Tensorboard
+You can specify tensorboard to run at startup as part of your instance config.
+
+Open the tensorboard port via docker 
+```tensorboard --logdir /workspace/EveryDream2trainer/logs --host 0.0.0.0 &```
+
+![Config](vastai_config.jpg)
+![Config](vastai_ports.jpg)
 # Once your instance is up and running
-Run the `EveryDream2trainer/Train_JupyterLab.ipynb` notebook
+Run the `Train_JupyterLab.ipynb` notebook
--- a/doc/runpod_config.jpg
+++ b/doc/runpod_config.jpg
--- a/doc/runpod_config.png
+++ b/doc/runpod_config.png
--- a/doc/vastai_config.jpg
+++ b/doc/vastai_config.jpg
--- a/doc/vastai_config.png
+++ b/doc/vastai_config.png
--- a/doc/vastai_ports.jpg
+++ b/doc/vastai_ports.jpg
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -24,7 +24,7 @@ ENV VIRTUAL_ENV=/workspace/venv
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"


-ADD docker/requirements.txt /build
+ADD requirements.txt /build
 RUN --mount=type=cache,target=/root/.cache/pip \
    python3 -m venv ${VIRTUAL_ENV} && \
    pip install -U -I torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url "https://download.pytorch.org/whl/cu117" && \
@ -71,16 +71,22 @@ RUN wget https://github.com/runpod/runpodctl/releases/download/v1.9.0/runpodctl-
 ENV VIRTUAL_ENV=/workspace/venv
 ENV PATH="$VIRTUAL_ENV/bin:$PATH"
 COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
+RUN echo "source ${VIRTUAL_ENV}/bin/activate" >> /root/.bashrc

 # Workaround for:
 #   https://github.com/TimDettmers/bitsandbytes/issues/62
 #   https://github.com/TimDettmers/bitsandbytes/issues/73
 ENV LD_LIBRARY_PATH="/usr/local/cuda-11.7/targets/x86_64-linux/lib"
 RUN ln /usr/local/cuda-11.7/targets/x86_64-linux/lib/libcudart.so.11.0 /usr/local/cuda-11.7/targets/x86_64-linux/lib/libcudart.so
+RUN pip install bitsandbytes==0.37.0

 WORKDIR /workspace
+RUN git clone https://github.com/victorchall/EveryDream2trainer
+WORKDIR /workspace/EveryDream2trainer
+RUN python utils/get_yamls.py && \
+    mkdir -p logs && mkdir -p input

-ADD docker/welcome.txt /
-ADD docker/start.sh /
+ADD welcome.txt /
+ADD start.sh /
 RUN chmod +x /start.sh
-CMD [ "/start.sh" ]
+CMD [ "/start.sh" ]
--- a/docker/start.sh
+++ b/docker/start.sh
@ -2,38 +2,6 @@
 cat /welcome.txt
 export PYTHONUNBUFFERED=1

-echo "source /workspace/venv/bin/activate" >> ~/.bashrc
-source ~/.bashrc
-
-# Workaround for:
-#   https://github.com/TimDettmers/bitsandbytes/issues/62
-#   https://github.com/TimDettmers/bitsandbytes/issues/73
-pip install bitsandbytes==0.37.0
-
-function clone_pull {
-  DIRECTORY=$(basename "$1" .git)
-  if [ -d "$DIRECTORY" ]; then
-    cd "$DIRECTORY"
-    git pull
-    cd ../
-  else
-    git clone "$1"
-  fi
-}
-
-
-# VSCode Dev Container
-if [[ $LOCAL_DEV ]]
-then
-  echo "Running in dev container, skipping git pull"
-else
-  clone_pull https://github.com/victorchall/EveryDream2trainer
-fi
-cd /workspace/EveryDream2trainer
-python utils/get_yamls.py
-mkdir -p /workspace/EveryDream2trainer/logs
-mkdir -p /workspace/EveryDream2trainer/input
-
 # RunPod SSH
 if [[ -v "PUBLIC_KEY" ]] && [[ ! -d "${HOME}/.ssh" ]]
 then
--- a/installers/Runpod.ipynb
+++ b/installers/Runpod.ipynb