trying a cog colab notebook, and remove flamingo notebook since it is removed from repo

2024-03-03 16:13:58 -05:00 · 2024-03-03 16:13:58 -05:00 · bd0acb57fd
parent fdf230634e
commit bd0acb57fd
1 changed files with 24 additions and 24 deletions
--- a/CaptionCog.ipynb
+++ b/CaptionCog.ipynb
@ -5,13 +5,13 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "# Open-flamingo Captioning\n",
-    "This notebook is an implementation of [OpenFlamingo](https://github.com/mlfoundations/open_flamingo) for image captioning. \n",
+    "# Cog Captioning\n",
+    "This notebook is an implementation of [CogVLM](https://github.com/THUDM/CogVLM) for image captioning. \n",
    "\n",
-    "This will require HIGH RAM shape on Google Colab, but T4 16gb is enough to run the 3B model.  9B model requires 24GB GPU or better.\n",
+    "This may require HIGH RAM shape on Google Colab, but T4 16gb is enough (even if slow).\n",
    "\n",
-    "1.  Read [Docs](doc/CAPTION.md) for basic usage guide. \n",
-    "2.  Open in [Google Colab](https://colab.research.google.com/github/victorchall/EveryDream2trainer/blob/main/CaptionFL.ipynb) **OR** Runpod/Vast using the EveryDream2trainer docker container/template and open this notebook.\n",
+    "1.  Read [Docs](doc/CAPTION_COG.md) for basic usage guide. \n",
+    "2.  Open in [Google Colab](https://colab.research.google.com/github/victorchall/EveryDream2trainer/blob/main/CaptionCog.ipynb) **OR** Runpod/Vast using the EveryDream2trainer docker container/template and open this notebook.\n",
    "3.  Run the cells below to install dependencies.\n",
    "4.  Place your images in \"input\" folder or change the data_root to point to a Gdrive folder."
   ]
@ -23,9 +23,8 @@
   "outputs": [],
   "source": [
    "# install dependencies\n",
-    "!pip install open-flamingo==2.0.0\n",
-    "!pip install huggingface-hub==0.15.1\n",
-    "!pip install transformers==4.30.2\n",
+    "!pip install huggingface-hub\n",
+    "!pip install transformers\n",
    "!pip install pynvml\n",
    "!pip install colorama"
   ]
@ -90,9 +89,10 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# 24GB GPU, 9b model\n",
+    "# 16GB GPU, must not use more than 1 beam\n",
+    "# 24GB GPU, can use 3 beams\n",
    "%cd /content/EveryDream2trainer\n",
-    "%run caption_fl.py --data_root \"input\" --min_new_tokens 20 --max_new_tokens 30 --num_beams 3 --model \"openflamingo/OpenFlamingo-9B-vitl-mpt7b\""
+    "%run caption_cog.py --image_dir \"input\" --num_beams 1 --prompt \"Write a description.\""
   ]
  },
  {
@ -101,28 +101,28 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# 16GB GPU, 3b model\n",
+    "# This is a fancier version of above with more options set\n",
    "%cd /content/EveryDream2trainer\n",
-    "%run caption_fl.py --data_root \"input\" --min_new_tokens 20 --max_new_tokens 30 --num_beams 8 --model \"openflamingo/OpenFlamingo-3B-vitl-mpt1b\""
+    "%run caption_cog.py --image_dir \"input\" --num_beams 1 --prompt \"Write a description.\" --starts_with \"An image of\" --remove_starts_with --temp 0.9 --top_p 0.9 --top_k 40 --bad_words \"depicts,showcases,appears,suggests\""
   ]
  }
 ],
 "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "machine_shape": "hm",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
  "language_info": {
   "name": "python"
  },
-  "orig_nbformat": 4,
-  "colab": {
-   "provenance": [],
-   "machine_shape": "hm",
-   "gpuType": "T4"
- },
-  "kernelspec": {
-   "name": "python3",
-   "display_name": "Python 3"
-  },
-  "accelerator": "GPU"
+  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 0
-}
+}