From 91a9b3d898d12e842125b668df5c5f69be0d6a4e Mon Sep 17 00:00:00 2001 From: Victor Hall Date: Sun, 3 Mar 2024 15:47:44 -0500 Subject: [PATCH] bugfix and doc fix --- caption_kosmos2.py | 2 +- doc/CAPTION.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/caption_kosmos2.py b/caption_kosmos2.py index 2576004..f55a1fc 100644 --- a/caption_kosmos2.py +++ b/caption_kosmos2.py @@ -103,7 +103,7 @@ def main(args): print(f"File: {full_file_path}, Generated caption: {processed_text}") name = os.path.splitext(full_file_path)[0] - if not os.path.exists(f"{name}.txt") or args.overwrite and not args.save_entities_only: + if (not os.path.exists(f"{name}.txt") or args.overwrite) and not args.save_entities_only: with open(f"{name}.txt", "w") as f: f.write(processed_text) diff --git a/doc/CAPTION.md b/doc/CAPTION.md index 9b46ec9..8b1896d 100644 --- a/doc/CAPTION.md +++ b/doc/CAPTION.md @@ -6,7 +6,7 @@ ## Kosmos-2 -Microsoft's [Kosmos-2](https://huggingface.co/microsoft/kosmos-2-patch14-224) is significantly lighter weight than Cog, using <5GB of VRAM and generating captions in under 1/21 second on a RTX 3090. +Microsoft's [Kosmos-2](https://huggingface.co/microsoft/kosmos-2-patch14-224) is significantly lighter weight than Cog, using <5GB of VRAM and generating captions in under a second on a RTX 3090. It has the capability to output grounding bounding boxes.