diff --git a/caption_kosmos2.py b/caption_kosmos2.py index 2576004..f55a1fc 100644 --- a/caption_kosmos2.py +++ b/caption_kosmos2.py @@ -103,7 +103,7 @@ def main(args): print(f"File: {full_file_path}, Generated caption: {processed_text}") name = os.path.splitext(full_file_path)[0] - if not os.path.exists(f"{name}.txt") or args.overwrite and not args.save_entities_only: + if (not os.path.exists(f"{name}.txt") or args.overwrite) and not args.save_entities_only: with open(f"{name}.txt", "w") as f: f.write(processed_text) diff --git a/doc/CAPTION.md b/doc/CAPTION.md index 9b46ec9..8b1896d 100644 --- a/doc/CAPTION.md +++ b/doc/CAPTION.md @@ -6,7 +6,7 @@ ## Kosmos-2 -Microsoft's [Kosmos-2](https://huggingface.co/microsoft/kosmos-2-patch14-224) is significantly lighter weight than Cog, using <5GB of VRAM and generating captions in under 1/21 second on a RTX 3090. +Microsoft's [Kosmos-2](https://huggingface.co/microsoft/kosmos-2-patch14-224) is significantly lighter weight than Cog, using <5GB of VRAM and generating captions in under a second on a RTX 3090. It has the capability to output grounding bounding boxes.