diff --git a/AutoCaption.ipynb b/AutoCaption.ipynb index 7461362..92ee84e 100644 --- a/AutoCaption.ipynb +++ b/AutoCaption.ipynb @@ -1 +1 @@ -{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":929,"status":"ok","timestamp":1667184580032,"user":{"displayName":"Victor Hall","userId":"00029068894644207946"},"user_tz":240},"id":"lWGx2LuU8Q_I","outputId":"d0eb4d03-f16d-460b-981d-d5f88447e85e"},"outputs":[{"name":"stdout","output_type":"stream","text":["Cloning into 'EveryDream'...\n","remote: Enumerating objects: 90, done.\u001b[K\n","remote: Counting objects: 100% (90/90), done.\u001b[K\n","remote: Compressing objects: 100% (59/59), done.\u001b[K\n","remote: Total 90 (delta 30), reused 76 (delta 18), pack-reused 0\u001b[K\n","Unpacking objects: 100% (90/90), done.\n"]}],"source":["#download repo\n","!git clone https://github.com/victorchall/EveryDream.git"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":4944,"status":"ok","timestamp":1667184754992,"user":{"displayName":"Victor Hall","userId":"00029068894644207946"},"user_tz":240},"id":"RJxfSai-8pkD","outputId":"0ac1b805-62a0-48aa-e0da-ee19503bb3f1"},"outputs":[{"name":"stdout","output_type":"stream","text":["Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n","\u001b[31mERROR: Could not find a version that satisfies the requirement pandas>=1.4.3 (from versions: 0.1, 0.2, 0.3.0, 0.4.0, 0.4.1, 0.4.2, 0.4.3, 0.5.0, 0.6.0, 0.6.1, 0.7.0, 0.7.1, 0.7.2, 0.7.3, 0.8.0, 0.8.1, 0.9.0, 0.9.1, 0.10.0, 0.10.1, 0.11.0, 0.12.0, 0.13.0, 0.13.1, 0.14.0, 0.14.1, 0.15.0, 0.15.1, 0.15.2, 0.16.0, 0.16.1, 0.16.2, 0.17.0, 0.17.1, 0.18.0, 0.18.1, 0.19.0, 0.19.1, 0.19.2, 0.20.0, 0.20.1, 0.20.2, 0.20.3, 0.21.0, 0.21.1, 0.22.0, 0.23.0, 0.23.1, 0.23.2, 0.23.3, 0.23.4, 0.24.0, 0.24.1, 0.24.2, 0.25.0, 0.25.1, 0.25.2, 0.25.3, 1.0.0, 1.0.1, 1.0.2, 1.0.3, 1.0.4, 1.0.5, 1.1.0, 1.1.1, 1.1.2, 1.1.3, 1.1.4, 1.1.5, 1.2.0, 1.2.1, 1.2.2, 1.2.3, 1.2.4, 1.2.5, 1.3.0, 1.3.1, 1.3.2, 1.3.3, 1.3.4, 1.3.5)\u001b[0m\n","\u001b[31mERROR: No matching distribution found for pandas>=1.4.3\u001b[0m\n","Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/, https://download.pytorch.org/whl/cu113\n","Requirement already satisfied: torch==1.12.1+cu113 in /usr/local/lib/python3.7/dist-packages (1.12.1+cu113)\n","Requirement already satisfied: torchvision==0.13.1+cu113 in /usr/local/lib/python3.7/dist-packages (0.13.1+cu113)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch==1.12.1+cu113) (4.1.1)\n","Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.7/dist-packages (from torchvision==0.13.1+cu113) (7.1.2)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from torchvision==0.13.1+cu113) (1.21.6)\n","Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from torchvision==0.13.1+cu113) (2.23.0)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->torchvision==0.13.1+cu113) (3.0.4)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->torchvision==0.13.1+cu113) (2022.9.24)\n","Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->torchvision==0.13.1+cu113) (2.10)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->torchvision==0.13.1+cu113) (1.24.3)\n","fatal: destination path 'scripts/BLIP' already exists and is not an empty directory.\n"]}],"source":["!pip install -r EveryDream/requirements.txt\n","!pip install torch=='1.12.1+cu113 torchvision==0.13.1+cu113' --extra-index-url https://download.pytorch.org/whl/cu113\n","!pip install pandas>='1.3.5'\n","!git clone https://github.com/salesforce/BLIP EveryDream/scripts/BLIP\n","!pip install timm\n","!pip install fairscale=='0.4.4'\n","!pip install transformers='4.19.2'\n","!pip install timm\n","# pandas will fail, fixed below"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":383,"status":"ok","timestamp":1667185773878,"user":{"displayName":"Victor Hall","userId":"00029068894644207946"},"user_tz":240},"id":"ruRaJ7Cx9vhw","outputId":"f0701d3e-bfa9-45a9-a742-c3615466aad7"},"outputs":[{"name":"stdout","output_type":"stream","text":["mkdir: cannot create directory ‘EveryDream/input’: File exists\n","mkdir: cannot create directory ‘EveryDream/output’: File exists\n"]}],"source":["# make folders for input and output\n","!mkdir EveryDream/input\n","!mkdir EveryDream/output\n","!mkdir .cache"]},{"cell_type":"markdown","metadata":{"id":"sbeUIVXJ-EVf"},"source":["Add your input images into the content/EveryDream/input folder\n","---\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":18221,"status":"ok","timestamp":1667185808005,"user":{"displayName":"Victor Hall","userId":"00029068894644207946"},"user_tz":240},"id":"4TAICahl-RPn","outputId":"da7fa1a8-0855-403a-c295-4da31658d1f6"},"outputs":[{"name":"stdout","output_type":"stream","text":["starting\n","Unix detected, using default asyncio event loop policy\n","starting\n","input_dir: /content/EveryDream/input\n","Downloading model to /content/EveryDream/.cache/model_base_caption_capfilt_large.pth... please wait\n","Model cached to: /content/EveryDream/.cache/model_base_caption_capfilt_large.pth\n","Downloading: 100% 226k/226k [00:00<00:00, 678kB/s]\n","Downloading: 100% 28.0/28.0 [00:00<00:00, 34.0kB/s]\n","Downloading: 100% 570/570 [00:00<00:00, 675kB/s]\n","load checkpoint from /content/EveryDream/.cache/model_base_caption_capfilt_large.pth\n","loading model to cuda\n","Traceback (most recent call last):\n"," File \"EveryDream/scripts/auto_caption.py\", line 178, in \n"," asyncio.run(main(opt))\n"," File \"/usr/lib/python3.7/asyncio/runners.py\", line 43, in run\n"," return loop.run_until_complete(main)\n"," File \"/usr/lib/python3.7/asyncio/base_events.py\", line 587, in run_until_complete\n"," return future.result()\n"," File \"EveryDream/scripts/auto_caption.py\", line 110, in main\n"," blip_decoder = blip_decoder.to(torch.device(\"cuda\"))\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 927, in to\n"," return self._apply(convert)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 579, in _apply\n"," module._apply(fn)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 579, in _apply\n"," module._apply(fn)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 579, in _apply\n"," module._apply(fn)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 602, in _apply\n"," param_applied = fn(param)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 925, in convert\n"," return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/cuda/__init__.py\", line 217, in _lazy_init\n"," torch._C._cuda_init()\n","RuntimeError: No CUDA GPUs are available\n"]}],"source":["!python EveryDream/scripts/auto_caption.py \\\n","--img_dir EveryDream/input \\\n","--out_dir EveryDream/output \\\n","#--min_length 34 \\ # optional longer prompts\n","#--q_factor 1.3 \\ # optional tweak for longer prompts\n","#--nucleus \\ # alternative algorithm for short captions"]},{"cell_type":"markdown","metadata":{"id":"HBrWnu1C_lN9"},"source":["Download your captioned images from /content/EveryDream/output"]}],"metadata":{"colab":{"authorship_tag":"ABX9TyN9ZSr0RyOQKdfeVsl2uOiE","collapsed_sections":[],"provenance":[{"file_id":"16QrivRfoDFvE7fAa7eLeVlxj78Q573E0","timestamp":1667185879409}]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0} +{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":929,"status":"ok","timestamp":1667184580032,"user":{"displayName":"Victor Hall","userId":"00029068894644207946"},"user_tz":240},"id":"lWGx2LuU8Q_I","outputId":"d0eb4d03-f16d-460b-981d-d5f88447e85e"},"outputs":[{"name":"stdout","output_type":"stream","text":["Cloning into 'EveryDream'...\n","remote: Enumerating objects: 90, done.\u001b[K\n","remote: Counting objects: 100% (90/90), done.\u001b[K\n","remote: Compressing objects: 100% (59/59), done.\u001b[K\n","remote: Total 90 (delta 30), reused 76 (delta 18), pack-reused 0\u001b[K\n","Unpacking objects: 100% (90/90), done.\n"]}],"source":["#download repo\n","!git clone https://github.com/victorchall/EveryDream.git\n","%cd EveryDream"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":4944,"status":"ok","timestamp":1667184754992,"user":{"displayName":"Victor Hall","userId":"00029068894644207946"},"user_tz":240},"id":"RJxfSai-8pkD","outputId":"0ac1b805-62a0-48aa-e0da-ee19503bb3f1"},"outputs":[{"ename":"","evalue":"","output_type":"error","traceback":["\u001b[1;31mRunning cells with 'Python 3.10.5 ('.venv': venv)' requires ipykernel package.\n","\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n","\u001b[1;31mCommand: 'r:/EveryDream/.venv/Scripts/python.exe -m pip install ipykernel -U --force-reinstall'"]}],"source":["!pip install torch=='1.12.1+cu113' 'torchvision==0.13.1+cu113' --extra-index-url https://download.pytorch.org/whl/cu113\n","!pip install pandas>='1.3.5'\n","!git clone https://github.com/salesforce/BLIP EveryDream/scripts/BLIP\n","!pip install timm\n","!pip install fairscale=='0.4.4'\n","!pip install transformers=='4.19.2'\n","!pip install timm\n","# pandas will fail, fixed below"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":383,"status":"ok","timestamp":1667185773878,"user":{"displayName":"Victor Hall","userId":"00029068894644207946"},"user_tz":240},"id":"ruRaJ7Cx9vhw","outputId":"f0701d3e-bfa9-45a9-a742-c3615466aad7"},"outputs":[{"name":"stdout","output_type":"stream","text":["mkdir: cannot create directory ‘EveryDream/input’: File exists\n","mkdir: cannot create directory ‘EveryDream/output’: File exists\n"]}],"source":["# make folders for input and output\n","!mkdir input\n","!mkdir output\n","!mkdir .cache"]},{"cell_type":"markdown","metadata":{"id":"sbeUIVXJ-EVf"},"source":["Add your input images into the content/EveryDream/input folder\n","![a](/doc/upload_images_caption.png)\n","![Beam vs Nucleus](../demo/beam_vs_nucleus.webp)"]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["#import sys\n","#import os\n","#blip_path = os.path.join(os.getcwd(), \"EveryDream/scripts/BLIP\")\n","#sys.path.append(blip_path)"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":18221,"status":"ok","timestamp":1667185808005,"user":{"displayName":"Victor Hall","userId":"00029068894644207946"},"user_tz":240},"id":"4TAICahl-RPn","outputId":"da7fa1a8-0855-403a-c295-4da31658d1f6"},"outputs":[{"name":"stdout","output_type":"stream","text":["starting\n","Unix detected, using default asyncio event loop policy\n","starting\n","input_dir: /content/EveryDream/input\n","Downloading model to /content/EveryDream/.cache/model_base_caption_capfilt_large.pth... please wait\n","Model cached to: /content/EveryDream/.cache/model_base_caption_capfilt_large.pth\n","Downloading: 100% 226k/226k [00:00<00:00, 678kB/s]\n","Downloading: 100% 28.0/28.0 [00:00<00:00, 34.0kB/s]\n","Downloading: 100% 570/570 [00:00<00:00, 675kB/s]\n","load checkpoint from /content/EveryDream/.cache/model_base_caption_capfilt_large.pth\n","loading model to cuda\n","Traceback (most recent call last):\n"," File \"EveryDream/scripts/auto_caption.py\", line 178, in \n"," asyncio.run(main(opt))\n"," File \"/usr/lib/python3.7/asyncio/runners.py\", line 43, in run\n"," return loop.run_until_complete(main)\n"," File \"/usr/lib/python3.7/asyncio/base_events.py\", line 587, in run_until_complete\n"," return future.result()\n"," File \"EveryDream/scripts/auto_caption.py\", line 110, in main\n"," blip_decoder = blip_decoder.to(torch.device(\"cuda\"))\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 927, in to\n"," return self._apply(convert)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 579, in _apply\n"," module._apply(fn)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 579, in _apply\n"," module._apply(fn)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 579, in _apply\n"," module._apply(fn)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 602, in _apply\n"," param_applied = fn(param)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py\", line 925, in convert\n"," return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)\n"," File \"/usr/local/lib/python3.7/dist-packages/torch/cuda/__init__.py\", line 217, in _lazy_init\n"," torch._C._cuda_init()\n","RuntimeError: No CUDA GPUs are available\n"]}],"source":["!python EveryDream/scripts/auto_caption.py \\\n","--img_dir EveryDream/input \\\n","--out_dir EveryDream/output \\\n","#--min_length 34 \\ # optional longer prompts\n","#--q_factor 1.3 \\ # optional tweak for longer prompts\n","#--nucleus \\ # alternative algorithm for short captions"]},{"cell_type":"markdown","metadata":{"id":"HBrWnu1C_lN9"},"source":["Download your captioned images from /content/EveryDream/output"]}],"metadata":{"colab":{"authorship_tag":"ABX9TyN9ZSr0RyOQKdfeVsl2uOiE","collapsed_sections":[],"provenance":[{"file_id":"16QrivRfoDFvE7fAa7eLeVlxj78Q573E0","timestamp":1667185879409}]},"kernelspec":{"display_name":"Python 3.10.5 ('.venv': venv)","language":"python","name":"python3"},"language_info":{"name":"python","version":"3.10.5"},"vscode":{"interpreter":{"hash":"faf4a6abb601e3a9195ce3e9620411ceec233a951446de834cdf28542d2d93b4"}}},"nbformat":4,"nbformat_minor":0} diff --git a/doc/upload_images_caption.png b/doc/upload_images_caption.png new file mode 100644 index 0000000..2950191 Binary files /dev/null and b/doc/upload_images_caption.png differ diff --git a/scripts/auto_caption.py b/scripts/auto_caption.py index 6dd1e08..e452ade 100644 --- a/scripts/auto_caption.py +++ b/scripts/auto_caption.py @@ -60,7 +60,7 @@ def get_parser(**parser_kwargs): const=True, default=24, help="adjusts the likelihood of a word being repeated", - ) + ), return parser @@ -82,27 +82,26 @@ async def main(opt): if opt.nucleus: sample = True - input_dir = os.path.join(os.getcwd(), opt.img_dir) + input_dir = opt.img_dir print("input_dir: ", input_dir) - config_path = os.path.join(os.getcwd(), "scripts/BLIP/configs/med_config.json") + config_path = "scripts/BLIP/configs/med_config.json" model_cache_path = ".cache/model_base_caption_capfilt_large.pth" - model_path = os.path.join(os.getcwd(), model_cache_path) - if not os.path.exists(model_path): - print(f"Downloading model to {model_path}... please wait") + if not os.path.exists(model_cache_path): + print(f"Downloading model to {model_cache_path}... please wait") blip_model_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_base_caption_capfilt_large.pth' async with aiohttp.ClientSession() as session: async with session.get(blip_model_url) as res: result = await res.read() - with open(model_path, 'wb') as f: + with open(model_cache_path, 'wb') as f: f.write(result) - print(f"Model cached to: {model_path}") + print(f"Model cached to: {model_cache_path}") else: - print(f"Model already cached to: {model_path}") + print(f"Model already cached to: {model_cache_path}") - blip_decoder = models.blip.blip_decoder(pretrained=model_path, image_size=384, vit='base', med_config=config_path) + blip_decoder = models.blip.blip_decoder(pretrained=model_cache_path, image_size=384, vit='base', med_config=config_path) blip_decoder.eval() print("loading model to cuda") @@ -159,7 +158,7 @@ def isWindows(): return sys.platform.startswith("win") if __name__ == "__main__": - print("starting") + print(f"starting in {print(os.getcwd())}") parser = get_parser() opt = parser.parse_args() @@ -172,7 +171,7 @@ if __name__ == "__main__": else: print("Unix detected, using default asyncio event loop policy") - blip_path = os.path.join(os.getcwd(), "scripts/BLIP") + blip_path = "scripts/BLIP" sys.path.append(blip_path) asyncio.run(main(opt))