From e903770897ae80f9b9ea02ba02eac4c680fd6202 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Mon, 17 Jun 2024 10:49:41 +0200 Subject: [PATCH 1/6] Support different image sizes in prefill in VLMs (#2065) When a batch contained images if different sizes during prefill, the server would fail (see e.g. #2056). Images were processed separately and then concatenated. However, this can fail for images with different sizes. Fix this by preprocessing all images in the batch together, so that the image processor can ensure that all image tensors have compatible sizes. --- .../test_flash_pali_gemma_two_images.json | 61 ++++++++ .../test_idefics/test_idefics_two_images.json | 85 +++++++++++ .../test_flash_idefics2_two_images.json | 133 ++++++++++++++++++ .../models/test_flash_pali_gemma.py | 23 +++ integration-tests/models/test_idefics.py | 21 +++ integration-tests/models/test_idefics2.py | 23 +++ .../models/vlm_causal_lm.py | 57 ++++---- 7 files changed, 376 insertions(+), 27 deletions(-) create mode 100644 integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma_two_images.json create mode 100644 integration-tests/models/__snapshots__/test_idefics/test_idefics_two_images.json create mode 100644 integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json diff --git a/integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma_two_images.json b/integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma_two_images.json new file mode 100644 index 00000000..ab4f3015 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_pali_gemma/test_flash_pali_gemma_two_images.json @@ -0,0 +1,61 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "eos_token", + "generated_tokens": 8, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 2502, + "logprob": -1.734375, + "special": false, + "text": "image" + }, + { + "id": 2196, + "logprob": -0.5756836, + "special": false, + "text": " result" + }, + { + "id": 604, + "logprob": -0.007843018, + "special": false, + "text": " for" + }, + { + "id": 12254, + "logprob": -1.7167969, + "special": false, + "text": " chicken" + }, + { + "id": 611, + "logprob": -0.17053223, + "special": false, + "text": " on" + }, + { + "id": 573, + "logprob": -0.7626953, + "special": false, + "text": " the" + }, + { + "id": 8318, + "logprob": -0.02709961, + "special": false, + "text": " beach" + }, + { + "id": 1, + "logprob": -0.20739746, + "special": true, + "text": "" + } + ], + "top_tokens": null + }, + "generated_text": "image result for chicken on the beach" +} diff --git a/integration-tests/models/__snapshots__/test_idefics/test_idefics_two_images.json b/integration-tests/models/__snapshots__/test_idefics/test_idefics_two_images.json new file mode 100644 index 00000000..a4727707 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_idefics/test_idefics_two_images.json @@ -0,0 +1,85 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "eos_token", + "generated_tokens": 12, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 450, + "logprob": -0.26342773, + "special": false, + "text": " The" + }, + { + "id": 21282, + "logprob": -0.01838684, + "special": false, + "text": " cow" + }, + { + "id": 322, + "logprob": -0.18041992, + "special": false, + "text": " and" + }, + { + "id": 521, + "logprob": -0.62841797, + "special": false, + "text": " ch" + }, + { + "id": 21475, + "logprob": -0.0037956238, + "special": false, + "text": "icken" + }, + { + "id": 526, + "logprob": -0.018737793, + "special": false, + "text": " are" + }, + { + "id": 373, + "logprob": -1.0820312, + "special": false, + "text": " on" + }, + { + "id": 263, + "logprob": -0.5083008, + "special": false, + "text": " a" + }, + { + "id": 25695, + "logprob": -0.07128906, + "special": false, + "text": " beach" + }, + { + "id": 29889, + "logprob": -0.12573242, + "special": false, + "text": "." + }, + { + "id": 32002, + "logprob": -0.0029792786, + "special": true, + "text": "" + }, + { + "id": 2, + "logprob": -0.00024962425, + "special": true, + "text": "" + } + ], + "top_tokens": null + }, + "generated_text": " The cow and chicken are on a beach." +} diff --git a/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json new file mode 100644 index 00000000..86c95b29 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_idefics2/test_flash_idefics2_two_images.json @@ -0,0 +1,133 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 20, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 415, + "logprob": -0.04421997, + "special": false, + "text": " The" + }, + { + "id": 12072, + "logprob": -0.13500977, + "special": false, + "text": " cow" + }, + { + "id": 349, + "logprob": -0.06750488, + "special": false, + "text": " is" + }, + { + "id": 6328, + "logprob": -0.6352539, + "special": false, + "text": " standing" + }, + { + "id": 356, + "logprob": -0.16186523, + "special": false, + "text": " on" + }, + { + "id": 272, + "logprob": -0.5078125, + "special": false, + "text": " the" + }, + { + "id": 10305, + "logprob": -0.017913818, + "special": false, + "text": " beach" + }, + { + "id": 304, + "logprob": -1.5205078, + "special": false, + "text": " and" + }, + { + "id": 272, + "logprob": -0.029174805, + "special": false, + "text": " the" + }, + { + "id": 13088, + "logprob": -0.003479004, + "special": false, + "text": " chicken" + }, + { + "id": 349, + "logprob": -0.0035095215, + "special": false, + "text": " is" + }, + { + "id": 6398, + "logprob": -0.3088379, + "special": false, + "text": " sitting" + }, + { + "id": 356, + "logprob": -0.027755737, + "special": false, + "text": " on" + }, + { + "id": 264, + "logprob": -0.31884766, + "special": false, + "text": " a" + }, + { + "id": 17972, + "logprob": -0.047943115, + "special": false, + "text": " pile" + }, + { + "id": 302, + "logprob": -0.0002925396, + "special": false, + "text": " of" + }, + { + "id": 2445, + "logprob": -0.02935791, + "special": false, + "text": " money" + }, + { + "id": 28723, + "logprob": -0.031219482, + "special": false, + "text": "." + }, + { + "id": 32002, + "logprob": -0.00034475327, + "special": true, + "text": "" + }, + { + "id": 2, + "logprob": -1.1920929e-07, + "special": true, + "text": "" + } + ], + "top_tokens": null + }, + "generated_text": " The cow is standing on the beach and the chicken is sitting on a pile of money." +} diff --git a/integration-tests/models/test_flash_pali_gemma.py b/integration-tests/models/test_flash_pali_gemma.py index d4e83c9f..6be1750c 100644 --- a/integration-tests/models/test_flash_pali_gemma.py +++ b/integration-tests/models/test_flash_pali_gemma.py @@ -22,6 +22,12 @@ async def flash_pali_gemma(flash_pali_gemma_handle): return flash_pali_gemma_handle.client +def get_chicken(): + with open("integration-tests/images/chicken_on_money.png", "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()) + return f"data:image/png;base64,{encoded_string.decode('utf-8')}" + + def get_cow_beach(): with open("integration-tests/images/cow_beach.png", "rb") as image_file: encoded_string = base64.b64encode(image_file.read()) @@ -37,3 +43,20 @@ async def test_flash_pali_gemma(flash_pali_gemma, response_snapshot): assert response.generated_text == "beach" assert response == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_flash_pali_gemma_two_images(flash_pali_gemma, response_snapshot): + chicken = get_chicken() + cow_beach = get_cow_beach() + response = await flash_pali_gemma.generate( + f"caption![]({chicken})![]({cow_beach})\n", + max_new_tokens=20, + ) + # Is PaliGemma not able to handle two separate images? At least we + # get output showing that both images are used. + assert ( + response.generated_text == "image result for chicken on the beach" + ), f"{repr(response.generated_text)}" + assert response == response_snapshot diff --git a/integration-tests/models/test_idefics.py b/integration-tests/models/test_idefics.py index aeeaffa1..ac807b76 100644 --- a/integration-tests/models/test_idefics.py +++ b/integration-tests/models/test_idefics.py @@ -23,6 +23,12 @@ def get_chicken(): return f"data:image/png;base64,{encoded_string.decode('utf-8')}" +def get_cow_beach(): + with open("integration-tests/images/cow_beach.png", "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()) + return f"data:image/png;base64,{encoded_string.decode('utf-8')}" + + @pytest.mark.asyncio async def test_idefics(idefics, response_snapshot): chicken = get_chicken() @@ -39,6 +45,21 @@ async def test_idefics(idefics, response_snapshot): assert response == response_snapshot +@pytest.mark.asyncio +@pytest.mark.private +async def test_idefics_two_images(idefics, response_snapshot): + chicken = get_chicken() + cow_beach = get_cow_beach() + response = await idefics.generate( + f"User:![]({chicken})![]({cow_beach})Where are the cow and chicken? \nAssistant:", + max_new_tokens=20, + ) + assert ( + response.generated_text == " The cow and chicken are on a beach." + ), f"{repr(response.generated_text)}" + assert response == response_snapshot + + @pytest.mark.asyncio async def test_idefics_load(idefics, generate_load, response_snapshot): chicken = get_chicken() diff --git a/integration-tests/models/test_idefics2.py b/integration-tests/models/test_idefics2.py index d34cce34..9aaf6d8a 100644 --- a/integration-tests/models/test_idefics2.py +++ b/integration-tests/models/test_idefics2.py @@ -9,6 +9,12 @@ def get_chicken(): return f"data:image/png;base64,{encoded_string.decode('utf-8')}" +def get_cow_beach(): + with open("integration-tests/images/cow_beach.png", "rb") as image_file: + encoded_string = base64.b64encode(image_file.read()) + return f"data:image/png;base64,{encoded_string.decode('utf-8')}" + + @pytest.fixture(scope="module") def flash_idefics2_next_handle(launcher): with launcher( @@ -38,6 +44,23 @@ async def test_flash_idefics2_next_simple(flash_idefics2_next, response_snapshot assert response == response_snapshot +@pytest.mark.asyncio +@pytest.mark.private +async def test_flash_idefics2_two_images(flash_idefics2_next, response_snapshot): + chicken = get_chicken() + cow_beach = get_cow_beach() + response = await flash_idefics2_next.generate( + f"User:![]({chicken})![]({cow_beach})Where are the cow and chicken? \nAssistant:", + max_new_tokens=20, + ) + assert ( + response.generated_text + == " The cow is standing on the beach and the chicken is sitting on a pile of money." + ), f"{repr(response.generated_text)}" + assert response.details.generated_tokens == 20 + assert response == response_snapshot + + @pytest.mark.asyncio @pytest.mark.private async def test_flash_idefics2_next_all_params(flash_idefics2_next, response_snapshot): diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py index 59a6fab1..8b5819d1 100644 --- a/server/text_generation_server/models/vlm_causal_lm.py +++ b/server/text_generation_server/models/vlm_causal_lm.py @@ -53,7 +53,9 @@ def image_text_replacement(image_input, config, image_id) -> str: num_features = get_number_of_features(height, width, config) from loguru import logger - logger.info(f"Found {num_features} in image of resolution {height}x{width}") + logger.info( + f"Found {num_features} features in image of resolution {height}x{width}" + ) return "" * num_features elif config.model_type == "paligemma": @@ -133,23 +135,41 @@ class VlmCausalLMBatch(FlashCausalLMBatch): def batch_tokenized_inputs( cls, requests: Iterable[generate_pb2.Request], tokenizer, processor, config ): + # Process images first. We need all of them so that the processor + # can make the image splits the same size. And we need the final + # sizes to insert correct number of image tokens. + images = [] + for r in requests: + for chunk in r.input_chunks.chunks: + chunk_type = chunk.WhichOneof("chunk") + if chunk_type == "text": + pass + elif chunk_type == "image": + image = Image.open(BytesIO(chunk.image.data)) + if config.model_type == "llava_next": + images.append(image) + else: + images.append([image]) + else: + raise RuntimeError(f"Invalid chunk type {chunk_type}") + + if images: + image_inputs = processor.image_processor(images, return_tensors="pt") + else: + image_inputs = None + batch_inputs = [] - image_inputs = [] max_truncation = 0 + image_id = 0 for r in requests: full_text = "" - image_id = 0 for chunk in r.input_chunks.chunks: chunk_type = chunk.WhichOneof("chunk") if chunk_type == "text": full_text += chunk.text elif chunk_type == "image": - image = Image.open(BytesIO(chunk.image.data)) - image_input = processor.image_processor(image, return_tensors="pt") - full_text += image_text_replacement(image_input, config, image_id) - image_inputs.append(image_input) - else: - raise RuntimeError(f"Invalid chunk type {chunk_type}") + full_text += image_text_replacement(image_inputs, config, image_id) + image_id += 1 batch_inputs.append(full_text) max_truncation = max(max_truncation, r.truncate) @@ -160,24 +180,7 @@ class VlmCausalLMBatch(FlashCausalLMBatch): max_length=max_truncation, add_special_tokens=not config.model_type == "paligemma", )["input_ids"] - if image_inputs: - image_input = image_inputs[0] - new_image_inputs = { - "pixel_values": torch.cat( - [img["pixel_values"] for img in image_inputs], dim=0 - ), - } - if "pixel_attention_mask" in image_input: - new_image_inputs["pixel_attention_mask"] = torch.cat( - [img["pixel_attention_mask"] for img in image_inputs], dim=0 - ) - if "image_sizes" in image_input: - new_image_inputs["image_sizes"] = torch.cat( - [img["image_sizes"] for img in image_inputs], dim=0 - ) - image_inputs = new_image_inputs - else: - image_inputs = None + return batch_tokenized_inputs, image_inputs @classmethod From 131838919e680f4ed0519786e8fc2a9baf182802 Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Mon, 17 Jun 2024 12:09:31 +0200 Subject: [PATCH 2/6] Contributing guide & Code of Conduct (#2074) * Contributing guide & Code of Conduct * Redirect to GitHub's tutorial on PRs --- CODE_OF_CONDUCT.md | 133 +++++++++++++++++++++++++++++++++++++++++++++ CONTRIBUTING.md | 120 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 CODE_OF_CONDUCT.md create mode 100644 CONTRIBUTING.md diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..ef09fa13 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,133 @@ + +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +feedback@huggingface.co. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..39b57c19 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,120 @@ + + +# Contribute to text-generation-inference + +Everyone is welcome to contribute, and we value everybody's contribution. Code +contributions are not the only way to help the community. Answering questions, helping +others, and improving the documentation are also immensely valuable. + +It also helps us if you spread the word! Reference the library in blog posts +about the awesome projects it made possible, shout out on Twitter every time it has +helped you, or simply ⭐️ the repository to say thank you. + +However you choose to contribute, please be mindful and respect our +[code of conduct](https://github.com/huggingface/text-generation-inference/blob/main/CODE_OF_CONDUCT.md). + +**This guide was heavily inspired by the awesome [scikit-learn guide to contributing](https://github.com/scikit-learn/scikit-learn/blob/main/CONTRIBUTING.md).** + +## Ways to contribute + +There are several ways you can contribute to text-generation-inference. + +* Fix outstanding issues with the existing code. +* Submit issues related to bugs or desired new features. +* Contribute to the examples or to the documentation. + +> All contributions are equally valuable to the community. 🥰 + +## Fixing outstanding issues + +If you notice an issue with the existing code and have a fix in mind, feel free to [start contributing](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) and open +a Pull Request! + +## Submitting a bug-related issue or feature request + +Do your best to follow these guidelines when submitting a bug-related issue or a feature +request. It will make it easier for us to come back to you quickly and with good +feedback. + +### Did you find a bug? + +The text-generation-inference library is robust and reliable thanks to users who report the problems they encounter. + +Before you report an issue, we would really appreciate it if you could **make sure the bug was not +already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the +library itself, and not your code. + +Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so +we can quickly resolve it: + +* Your **OS type and version**, as well as your environment versions (versions of rust, python, and dependencies). +* A short, self-contained, code snippet that allows us to reproduce the bug. +* The *full* traceback if an exception is raised. +* Attach any other additional information, like screenshots, you think may help. + +To get the OS and software versions automatically, you can re-run the launcher with the `--env` flag: + +```bash +text-generation-launcher --env +``` + +This will precede the launch of the model with the information relative to your environment. We recommend pasting +that in your issue report. + +### Do you want a new feature? + +If there is a new feature you'd like to see in text-generation-inference, please open an issue and describe: + +1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it + a feature related to something you need for a project? Is it something you worked on and think it could benefit + the community? + + Whatever it is, we'd love to hear about it! + +2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better + we'll be able to help you. +3. Provide a *code snippet* that demonstrates the feature's usage. +4. If the feature is related to a paper, please include a link. + +If your issue is well written we're already 80% of the way there by the time you create it. + +We have added [templates](https://github.com/huggingface/text-generation-inference/tree/main/.github/ISSUE_TEMPLATE) +to help you get started with your issue. + +## Do you want to implement a new model? + +New models are constantly released and if you want to implement a new model, please provide the following information: + +* A short description of the model and a link to the paper. +* Link to the implementation if it is open-sourced. +* Link to the model weights if they are available. + +If you are willing to contribute the model yourself, let us know so we can help you add it to text-generation-inference! + +## Do you want to add documentation? + +We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know +how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be +happy to make the changes or help you make a contribution if you're interested! + +## I want to become a maintainer of the project. How do I get there? + +TGI is a project led and managed by Hugging Face as it powers our internal services. However, we are happy to have +motivated individuals from other organizations join us as maintainers with the goal of making TGI the best inference +service. + +If you are such an individual (or organization), please reach out to us and let's collaborate. \ No newline at end of file From 0f7d38e774aff78d41bd63c4baaa7a96f9320c0b Mon Sep 17 00:00:00 2001 From: Ziru Niu Date: Mon, 17 Jun 2024 18:10:01 +0800 Subject: [PATCH 3/6] fix build.rs watch files (#2072) --- router/client/build.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/router/client/build.rs b/router/client/build.rs index a7ade9b0..210cd603 100644 --- a/router/client/build.rs +++ b/router/client/build.rs @@ -1,7 +1,7 @@ use std::fs; fn main() -> Result<(), Box> { - println!("cargo:rerun-if-changed=../../proto/**"); + println!("cargo:rerun-if-changed=../../proto/"); fs::create_dir_all("src/v2/pb").unwrap_or(()); let mut config = prost_build::Config::new(); From c8c7ccd31e1e760d216c9d2f2b17b0d984ed033b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Mon, 17 Jun 2024 16:40:44 +0200 Subject: [PATCH 4/6] Set maximum grpc message receive size to 2GiB (#2075) * Set maximum grpc message receive size to 2GiB The previous default was 4MiB, which doesn't really work well for multi-modal models. * Update to Rust 1.79.0 * Fixup formatting to make PR pass --- .github/workflows/tests.yaml | 6 +++--- CODE_OF_CONDUCT.md | 2 +- CONTRIBUTING.md | 22 +++++++++++----------- Dockerfile | 2 +- Dockerfile_amd | 2 +- Dockerfile_intel | 2 +- benchmark/src/app.rs | 12 ++++++------ benchmark/src/table.rs | 6 +++--- benchmark/src/utils.rs | 2 +- rust-toolchain.toml | 6 +++--- server/text_generation_server/server.py | 6 +++++- 11 files changed, 36 insertions(+), 32 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 74479cc6..83fff196 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -33,9 +33,9 @@ jobs: - name: Install Rust uses: actions-rs/toolchain@v1 with: - # Released on: 02 May, 2024 - # https://releases.rs/docs/1.78.0/ - toolchain: 1.78.0 + # Released on: June 13, 2024 + # https://releases.rs/docs/1.79.0/ + toolchain: 1.79.0 override: true components: rustfmt, clippy - name: Install Protoc diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index ef09fa13..b23f3150 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -130,4 +130,4 @@ For answers to common questions about this code of conduct, see the FAQ at [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html [Mozilla CoC]: https://github.com/mozilla/diversity [FAQ]: https://www.contributor-covenant.org/faq -[translations]: https://www.contributor-covenant.org/translations \ No newline at end of file +[translations]: https://www.contributor-covenant.org/translations diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 39b57c19..d541e47f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -55,10 +55,10 @@ feedback. The text-generation-inference library is robust and reliable thanks to users who report the problems they encounter. Before you report an issue, we would really appreciate it if you could **make sure the bug was not -already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the -library itself, and not your code. +already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the +library itself, and not your code. -Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so +Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it: * Your **OS type and version**, as well as your environment versions (versions of rust, python, and dependencies). @@ -79,20 +79,20 @@ that in your issue report. If there is a new feature you'd like to see in text-generation-inference, please open an issue and describe: -1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it - a feature related to something you need for a project? Is it something you worked on and think it could benefit +1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it + a feature related to something you need for a project? Is it something you worked on and think it could benefit the community? Whatever it is, we'd love to hear about it! -2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better +2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better we'll be able to help you. 3. Provide a *code snippet* that demonstrates the feature's usage. 4. If the feature is related to a paper, please include a link. If your issue is well written we're already 80% of the way there by the time you create it. -We have added [templates](https://github.com/huggingface/text-generation-inference/tree/main/.github/ISSUE_TEMPLATE) +We have added [templates](https://github.com/huggingface/text-generation-inference/tree/main/.github/ISSUE_TEMPLATE) to help you get started with your issue. ## Do you want to implement a new model? @@ -107,14 +107,14 @@ If you are willing to contribute the model yourself, let us know so we can help ## Do you want to add documentation? -We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know -how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be +We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know +how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be happy to make the changes or help you make a contribution if you're interested! ## I want to become a maintainer of the project. How do I get there? TGI is a project led and managed by Hugging Face as it powers our internal services. However, we are happy to have motivated individuals from other organizations join us as maintainers with the goal of making TGI the best inference -service. +service. -If you are such an individual (or organization), please reach out to us and let's collaborate. \ No newline at end of file +If you are such an individual (or organization), please reach out to us and let's collaborate. diff --git a/Dockerfile b/Dockerfile index 14628339..c93372a2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse diff --git a/Dockerfile_amd b/Dockerfile_amd index c79bc03c..55da9204 100644 --- a/Dockerfile_amd +++ b/Dockerfile_amd @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse diff --git a/Dockerfile_intel b/Dockerfile_intel index cb0e84bb..35362fc9 100644 --- a/Dockerfile_intel +++ b/Dockerfile_intel @@ -1,4 +1,4 @@ -FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse diff --git a/benchmark/src/app.rs b/benchmark/src/app.rs index 48ac976a..a0a9313a 100644 --- a/benchmark/src/app.rs +++ b/benchmark/src/app.rs @@ -497,7 +497,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec> { "Lowest: {:.2} {unit}", data.iter() .min_by(|a, b| a.total_cmp(b)) - .unwrap_or(&std::f64::NAN) + .unwrap_or(&f64::NAN) ), Style::default().fg(Color::Reset), )]), @@ -506,7 +506,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec> { "Highest: {:.2} {unit}", data.iter() .max_by(|a, b| a.total_cmp(b)) - .unwrap_or(&std::f64::NAN) + .unwrap_or(&f64::NAN) ), Style::default().fg(Color::Reset), )]), @@ -555,17 +555,17 @@ fn latency_throughput_chart<'a>( let min_latency: f64 = *latency_iter .clone() .min_by(|a, b| a.total_cmp(b)) - .unwrap_or(&std::f64::NAN); + .unwrap_or(&f64::NAN); let max_latency: f64 = *latency_iter .max_by(|a, b| a.total_cmp(b)) - .unwrap_or(&std::f64::NAN); + .unwrap_or(&f64::NAN); let min_throughput: f64 = *throughput_iter .clone() .min_by(|a, b| a.total_cmp(b)) - .unwrap_or(&std::f64::NAN); + .unwrap_or(&f64::NAN); let max_throughput: f64 = *throughput_iter .max_by(|a, b| a.total_cmp(b)) - .unwrap_or(&std::f64::NAN); + .unwrap_or(&f64::NAN); // Char min max values let min_x = if zoom { diff --git a/benchmark/src/table.rs b/benchmark/src/table.rs index e18d7310..1585a25f 100644 --- a/benchmark/src/table.rs +++ b/benchmark/src/table.rs @@ -156,17 +156,17 @@ fn avg_min_max(data: &[f64]) -> (f64, f64, f64) { let min = data .iter() .min_by(|a, b| a.total_cmp(b)) - .unwrap_or(&std::f64::NAN); + .unwrap_or(&f64::NAN); let max = data .iter() .max_by(|a, b| a.total_cmp(b)) - .unwrap_or(&std::f64::NAN); + .unwrap_or(&f64::NAN); (average, *min, *max) } fn px(data: &[f64], p: u32) -> f64 { let i = (f64::from(p) / 100.0 * data.len() as f64) as usize; - *data.get(i).unwrap_or(&std::f64::NAN) + *data.get(i).unwrap_or(&f64::NAN) } fn format_value(value: f64, unit: &'static str) -> String { diff --git a/benchmark/src/utils.rs b/benchmark/src/utils.rs index d096d655..20469991 100644 --- a/benchmark/src/utils.rs +++ b/benchmark/src/utils.rs @@ -37,7 +37,7 @@ pub(crate) fn percentiles(values: &[f64], pecents: &[i32]) -> BTreeMap Date: Tue, 18 Jun 2024 09:13:04 +0200 Subject: [PATCH 5/6] CI: Tailscale improvements (#2079) * test local tailscale * Update build.yaml * Update build.yaml * Update build.yaml * Update build.yaml * wait for ssh * network host * change step order --- .github/workflows/build.yaml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index e80037b1..ad1377a2 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -51,16 +51,19 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v3 - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 - with: - install: true + - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4.4.1 - name: Tailscale uses: huggingface/tailscale-action@main with: authkey: ${{ secrets.TAILSCALE_AUTHKEY }} + slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }} + slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }} + - name: Initialize Docker Buildx + uses: docker/setup-buildx-action@v2.0.0 + with: + install: true - name: Login to GitHub Container Registry if: github.event_name != 'pull_request' uses: docker/login-action@v2 @@ -121,6 +124,7 @@ jobs: DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ matrix.label }} tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }} labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }} + network: host cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache${{ matrix.label }},mode=min cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache${{ matrix.label }},mode=min - name: Set up Python @@ -139,3 +143,8 @@ jobs: export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }} export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} pytest -s -vv integration-tests + - name: Tailscale Wait + if: ${{ failure() || runner.debug == '1' }} + uses: huggingface/tailscale-action@main + with: + waitForSSH: true From 11ea9ce002e796cc59714950b557b4021cbebc58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Tue, 18 Jun 2024 09:38:21 +0200 Subject: [PATCH 6/6] CI: pass pre-commit hooks again (#2084) --- .github/workflows/build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index ad1377a2..8c407e81 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -51,7 +51,7 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v3 - + - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4.4.1 - name: Tailscale