diff --git a/server/text_generation_server/models/custom_modeling/idefics_image_processing.py b/server/text_generation_server/models/custom_modeling/idefics_image_processing.py index 21aa3ff3..4760ae6f 100644 --- a/server/text_generation_server/models/custom_modeling/idefics_image_processing.py +++ b/server/text_generation_server/models/custom_modeling/idefics_image_processing.py @@ -201,8 +201,13 @@ class IdeficsImageProcessor(BaseImageProcessor): response = requests.get(image_url_or_urls, stream=True, headers=headers, timeout=(1, 5)) response.raise_for_status() content = response.content - else: + elif image.startswith("data:"): + # https://stackoverflow.com/questions/17090571/is-there-a-way-to-set-background-image-as-a-base64-encoded-image + # data:image/png;base64,xxx + image = image.split(",")[-1] content = base64.b64decode(image) + else: + raise ValueError(f"Unrecognized image {image}") try: image = Image.open(BytesIO(content)) diff --git a/server/text_generation_server/models/custom_modeling/idefics_processing.py b/server/text_generation_server/models/custom_modeling/idefics_processing.py index 0fbcbeeb..98e43a27 100644 --- a/server/text_generation_server/models/custom_modeling/idefics_processing.py +++ b/server/text_generation_server/models/custom_modeling/idefics_processing.py @@ -112,6 +112,11 @@ def is_url(string): result = urlparse(string) return all([result.scheme, result.netloc]) +def is_image(string): + """Checks if the passed string contains a valid url and nothing else. e.g. if space is included it's immediately + invalidated the url""" + return is_url(string) or string.startswith("data:") + class IdeficsProcessor(ProcessorMixin): r""" @@ -314,7 +319,7 @@ class IdeficsProcessor(ProcessorMixin): if isinstance(item, str): item = item.strip(" ") - if is_url(item): + if is_image(item): image = self.image_processor.fetch_images(item) full_text += image_tokens(last_was_image) image_objects.append(image) @@ -339,6 +344,7 @@ class IdeficsProcessor(ProcessorMixin): image_objects = self.image_processor(image_objects, transform=transform) + text_encoding = self.tokenizer( text=full_text, add_special_tokens=False,