From 678638504d7ccbd6303291830fe973a5b0bc339d Mon Sep 17 00:00:00 2001
From: Victor Hall <victor.charles.hall@gmail.com>
Date: Mon, 3 Jul 2023 14:27:17 -0400
Subject: [PATCH] flamingo caption notebook beta

---
 CaptionFL.ipynb | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 CaptionFL.ipynb

diff --git a/CaptionFL.ipynb b/CaptionFL.ipynb
new file mode 100644
index 0000000..4642840
--- /dev/null
+++ b/CaptionFL.ipynb
@@ -0,0 +1,109 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Open-flamingo Captioning\n",
+    "\n",
+    "1.  Read [Docs](doc/CAPTION.md) for basic usage guide. \n",
+    "2.  Open in [Google Colab](https://colab.research.google.com/github/victorchall/EveryDream2trainer/blob/main/CaptionFL.ipynb) **OR** Runpod/Vast using the EveryDream2trainer docker container/template and open this notebook.\n",
+    "3.  Run the cells below to install dependencies.\n",
+    "4.  Place your images in \"input\" folder or change the data_root to point to a Gdrive folder."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# install dependencies\n",
+    "!pip install open-flamingo==2.0.0\n",
+    "!pip install huggingface-hub==0.15.1\n",
+    "!pip install transformers==4.30.2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Colab only setup (do NOT run for docker/runpod/vast)\n",
+    "!git clone https://github.com/victorchall/EveryDream2trainer\n",
+    "%cd EveryDream2trainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@markdown Optional:  Extract all TAR and ZIP files in the input folder (so you can just upload a large TAR/ZIP)\n",
+    "import os\n",
+    "import zipfile\n",
+    "import tarfile\n",
+    "\n",
+    "# Directory containing the input files\n",
+    "input_folder = \"input\"\n",
+    "\n",
+    "# Extract ZIP files\n",
+    "for file in os.listdir(input_folder):\n",
+    "    if file.endswith(\".zip\"):\n",
+    "        file_path = os.path.join(input_folder, file)\n",
+    "        with zipfile.ZipFile(file_path, 'r') as zip_ref:\n",
+    "            zip_ref.extractall(input_folder)\n",
+    "\n",
+    "# Extract TAR files\n",
+    "for file in os.listdir(input_folder):\n",
+    "    if file.endswith(\".tar\"):\n",
+    "        file_path = os.path.join(input_folder, file)\n",
+    "        with tarfile.open(file_path, 'r') as tar_ref:\n",
+    "            tar_ref.extractall(input_folder)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run captions.\n",
+    "\n",
+    "Place your images in \"input\" folder, or you can change the data_root to point to a Gdrive folder.\n",
+    "\n",
+    "Run either the 24GB or 16GB model or adjust settings on your own."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 24GB GPU, 9b model\n",
+    "%run caption_fl.py --data_root \"input\" --min_new_tokens 20 --max_new_tokens 30 --num_beams 3 --model \"openflamingo/OpenFlamingo-9B-vitl-mpt7b\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 16GB GPU, 3b model\n",
+    "%run caption_fl.py --data_root \"input\" --min_new_tokens 20 --max_new_tokens 30 --num_beams 8 --model \"openflamingo/OpenFlamingo-3B-vitl-mpt1b\""
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}