From 678638504d7ccbd6303291830fe973a5b0bc339d Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Mon, 3 Jul 2023 14:27:17 -0400 Subject: [PATCH] flamingo caption notebook beta --- CaptionFL.ipynb | 109 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 CaptionFL.ipynb diff --git a/CaptionFL.ipynb b/CaptionFL.ipynb new file mode 100644 index 0000000..4642840 --- /dev/null +++ b/CaptionFL.ipynb @@ -0,0 +1,109 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Open-flamingo Captioning\n", + "\n", + "1. Read [Docs](doc/CAPTION.md) for basic usage guide. \n", + "2. Open in [Google Colab](https://colab.research.google.com/github/victorchall/EveryDream2trainer/blob/main/CaptionFL.ipynb) **OR** Runpod/Vast using the EveryDream2trainer docker container/template and open this notebook.\n", + "3. Run the cells below to install dependencies.\n", + "4. Place your images in \"input\" folder or change the data_root to point to a Gdrive folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# install dependencies\n", + "!pip install open-flamingo==2.0.0\n", + "!pip install huggingface-hub==0.15.1\n", + "!pip install transformers==4.30.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Colab only setup (do NOT run for docker/runpod/vast)\n", + "!git clone https://github.com/victorchall/EveryDream2trainer\n", + "%cd EveryDream2trainer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@markdown Optional: Extract all TAR and ZIP files in the input folder (so you can just upload a large TAR/ZIP)\n", + "import os\n", + "import zipfile\n", + "import tarfile\n", + "\n", + "# Directory containing the input files\n", + "input_folder = \"input\"\n", + "\n", + "# Extract ZIP files\n", + "for file in os.listdir(input_folder):\n", + " if file.endswith(\".zip\"):\n", + " file_path = os.path.join(input_folder, file)\n", + " with zipfile.ZipFile(file_path, 'r') as zip_ref:\n", + " zip_ref.extractall(input_folder)\n", + "\n", + "# Extract TAR files\n", + "for file in os.listdir(input_folder):\n", + " if file.endswith(\".tar\"):\n", + " file_path = os.path.join(input_folder, file)\n", + " with tarfile.open(file_path, 'r') as tar_ref:\n", + " tar_ref.extractall(input_folder)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run captions.\n", + "\n", + "Place your images in \"input\" folder, or you can change the data_root to point to a Gdrive folder.\n", + "\n", + "Run either the 24GB or 16GB model or adjust settings on your own." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 24GB GPU, 9b model\n", + "%run caption_fl.py --data_root \"input\" --min_new_tokens 20 --max_new_tokens 30 --num_beams 3 --model \"openflamingo/OpenFlamingo-9B-vitl-mpt7b\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 16GB GPU, 3b model\n", + "%run caption_fl.py --data_root \"input\" --min_new_tokens 20 --max_new_tokens 30 --num_beams 8 --model \"openflamingo/OpenFlamingo-3B-vitl-mpt1b\"" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}