Merge branch 'main' into lora-internal
This commit is contained in:
commit
224455f389
|
@ -51,16 +51,19 @@ jobs:
|
|||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Initialize Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2.0.0
|
||||
with:
|
||||
install: true
|
||||
|
||||
- name: Inject slug/short variables
|
||||
uses: rlespinasse/github-slug-action@v4.4.1
|
||||
- name: Tailscale
|
||||
uses: huggingface/tailscale-action@main
|
||||
with:
|
||||
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
|
||||
slackChannel: ${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}
|
||||
slackToken: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
|
||||
- name: Initialize Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2.0.0
|
||||
with:
|
||||
install: true
|
||||
- name: Login to GitHub Container Registry
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v2
|
||||
|
@ -121,6 +124,7 @@ jobs:
|
|||
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ matrix.label }}
|
||||
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
|
||||
network: host
|
||||
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache${{ matrix.label }},mode=min
|
||||
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache${{ matrix.label }},mode=min
|
||||
- name: Set up Python
|
||||
|
@ -139,3 +143,8 @@ jobs:
|
|||
export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
|
||||
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
||||
pytest -s -vv integration-tests
|
||||
- name: Tailscale Wait
|
||||
if: ${{ failure() || runner.debug == '1' }}
|
||||
uses: huggingface/tailscale-action@main
|
||||
with:
|
||||
waitForSSH: true
|
||||
|
|
|
@ -33,9 +33,9 @@ jobs:
|
|||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
# Released on: 02 May, 2024
|
||||
# https://releases.rs/docs/1.78.0/
|
||||
toolchain: 1.78.0
|
||||
# Released on: June 13, 2024
|
||||
# https://releases.rs/docs/1.79.0/
|
||||
toolchain: 1.79.0
|
||||
override: true
|
||||
components: rustfmt, clippy
|
||||
- name: Install Protoc
|
||||
|
|
|
@ -0,0 +1,133 @@
|
|||
|
||||
# Contributor Covenant Code of Conduct
|
||||
|
||||
## Our Pledge
|
||||
|
||||
We as members, contributors, and leaders pledge to make participation in our
|
||||
community a harassment-free experience for everyone, regardless of age, body
|
||||
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
||||
identity and expression, level of experience, education, socio-economic status,
|
||||
nationality, personal appearance, race, caste, color, religion, or sexual
|
||||
identity and orientation.
|
||||
|
||||
We pledge to act and interact in ways that contribute to an open, welcoming,
|
||||
diverse, inclusive, and healthy community.
|
||||
|
||||
## Our Standards
|
||||
|
||||
Examples of behavior that contributes to a positive environment for our
|
||||
community include:
|
||||
|
||||
* Demonstrating empathy and kindness toward other people
|
||||
* Being respectful of differing opinions, viewpoints, and experiences
|
||||
* Giving and gracefully accepting constructive feedback
|
||||
* Accepting responsibility and apologizing to those affected by our mistakes,
|
||||
and learning from the experience
|
||||
* Focusing on what is best not just for us as individuals, but for the overall
|
||||
community
|
||||
|
||||
Examples of unacceptable behavior include:
|
||||
|
||||
* The use of sexualized language or imagery, and sexual attention or advances of
|
||||
any kind
|
||||
* Trolling, insulting or derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or email address,
|
||||
without their explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Enforcement Responsibilities
|
||||
|
||||
Community leaders are responsible for clarifying and enforcing our standards of
|
||||
acceptable behavior and will take appropriate and fair corrective action in
|
||||
response to any behavior that they deem inappropriate, threatening, offensive,
|
||||
or harmful.
|
||||
|
||||
Community leaders have the right and responsibility to remove, edit, or reject
|
||||
comments, commits, code, wiki edits, issues, and other contributions that are
|
||||
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
||||
decisions when appropriate.
|
||||
|
||||
## Scope
|
||||
|
||||
This Code of Conduct applies within all community spaces, and also applies when
|
||||
an individual is officially representing the community in public spaces.
|
||||
Examples of representing our community include using an official e-mail address,
|
||||
posting via an official social media account, or acting as an appointed
|
||||
representative at an online or offline event.
|
||||
|
||||
## Enforcement
|
||||
|
||||
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
||||
reported to the community leaders responsible for enforcement at
|
||||
feedback@huggingface.co.
|
||||
All complaints will be reviewed and investigated promptly and fairly.
|
||||
|
||||
All community leaders are obligated to respect the privacy and security of the
|
||||
reporter of any incident.
|
||||
|
||||
## Enforcement Guidelines
|
||||
|
||||
Community leaders will follow these Community Impact Guidelines in determining
|
||||
the consequences for any action they deem in violation of this Code of Conduct:
|
||||
|
||||
### 1. Correction
|
||||
|
||||
**Community Impact**: Use of inappropriate language or other behavior deemed
|
||||
unprofessional or unwelcome in the community.
|
||||
|
||||
**Consequence**: A private, written warning from community leaders, providing
|
||||
clarity around the nature of the violation and an explanation of why the
|
||||
behavior was inappropriate. A public apology may be requested.
|
||||
|
||||
### 2. Warning
|
||||
|
||||
**Community Impact**: A violation through a single incident or series of
|
||||
actions.
|
||||
|
||||
**Consequence**: A warning with consequences for continued behavior. No
|
||||
interaction with the people involved, including unsolicited interaction with
|
||||
those enforcing the Code of Conduct, for a specified period of time. This
|
||||
includes avoiding interactions in community spaces as well as external channels
|
||||
like social media. Violating these terms may lead to a temporary or permanent
|
||||
ban.
|
||||
|
||||
### 3. Temporary Ban
|
||||
|
||||
**Community Impact**: A serious violation of community standards, including
|
||||
sustained inappropriate behavior.
|
||||
|
||||
**Consequence**: A temporary ban from any sort of interaction or public
|
||||
communication with the community for a specified period of time. No public or
|
||||
private interaction with the people involved, including unsolicited interaction
|
||||
with those enforcing the Code of Conduct, is allowed during this period.
|
||||
Violating these terms may lead to a permanent ban.
|
||||
|
||||
### 4. Permanent Ban
|
||||
|
||||
**Community Impact**: Demonstrating a pattern of violation of community
|
||||
standards, including sustained inappropriate behavior, harassment of an
|
||||
individual, or aggression toward or disparagement of classes of individuals.
|
||||
|
||||
**Consequence**: A permanent ban from any sort of public interaction within the
|
||||
community.
|
||||
|
||||
## Attribution
|
||||
|
||||
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
||||
version 2.1, available at
|
||||
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
||||
|
||||
Community Impact Guidelines were inspired by
|
||||
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
||||
|
||||
For answers to common questions about this code of conduct, see the FAQ at
|
||||
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
||||
[https://www.contributor-covenant.org/translations][translations].
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
||||
[Mozilla CoC]: https://github.com/mozilla/diversity
|
||||
[FAQ]: https://www.contributor-covenant.org/faq
|
||||
[translations]: https://www.contributor-covenant.org/translations
|
|
@ -0,0 +1,120 @@
|
|||
<!---
|
||||
Copyright 2024 The HuggingFace Team. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
# Contribute to text-generation-inference
|
||||
|
||||
Everyone is welcome to contribute, and we value everybody's contribution. Code
|
||||
contributions are not the only way to help the community. Answering questions, helping
|
||||
others, and improving the documentation are also immensely valuable.
|
||||
|
||||
It also helps us if you spread the word! Reference the library in blog posts
|
||||
about the awesome projects it made possible, shout out on Twitter every time it has
|
||||
helped you, or simply ⭐️ the repository to say thank you.
|
||||
|
||||
However you choose to contribute, please be mindful and respect our
|
||||
[code of conduct](https://github.com/huggingface/text-generation-inference/blob/main/CODE_OF_CONDUCT.md).
|
||||
|
||||
**This guide was heavily inspired by the awesome [scikit-learn guide to contributing](https://github.com/scikit-learn/scikit-learn/blob/main/CONTRIBUTING.md).**
|
||||
|
||||
## Ways to contribute
|
||||
|
||||
There are several ways you can contribute to text-generation-inference.
|
||||
|
||||
* Fix outstanding issues with the existing code.
|
||||
* Submit issues related to bugs or desired new features.
|
||||
* Contribute to the examples or to the documentation.
|
||||
|
||||
> All contributions are equally valuable to the community. 🥰
|
||||
|
||||
## Fixing outstanding issues
|
||||
|
||||
If you notice an issue with the existing code and have a fix in mind, feel free to [start contributing](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request) and open
|
||||
a Pull Request!
|
||||
|
||||
## Submitting a bug-related issue or feature request
|
||||
|
||||
Do your best to follow these guidelines when submitting a bug-related issue or a feature
|
||||
request. It will make it easier for us to come back to you quickly and with good
|
||||
feedback.
|
||||
|
||||
### Did you find a bug?
|
||||
|
||||
The text-generation-inference library is robust and reliable thanks to users who report the problems they encounter.
|
||||
|
||||
Before you report an issue, we would really appreciate it if you could **make sure the bug was not
|
||||
already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the
|
||||
library itself, and not your code.
|
||||
|
||||
Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so
|
||||
we can quickly resolve it:
|
||||
|
||||
* Your **OS type and version**, as well as your environment versions (versions of rust, python, and dependencies).
|
||||
* A short, self-contained, code snippet that allows us to reproduce the bug.
|
||||
* The *full* traceback if an exception is raised.
|
||||
* Attach any other additional information, like screenshots, you think may help.
|
||||
|
||||
To get the OS and software versions automatically, you can re-run the launcher with the `--env` flag:
|
||||
|
||||
```bash
|
||||
text-generation-launcher --env
|
||||
```
|
||||
|
||||
This will precede the launch of the model with the information relative to your environment. We recommend pasting
|
||||
that in your issue report.
|
||||
|
||||
### Do you want a new feature?
|
||||
|
||||
If there is a new feature you'd like to see in text-generation-inference, please open an issue and describe:
|
||||
|
||||
1. What is the *motivation* behind this feature? Is it related to a problem or frustration with the library? Is it
|
||||
a feature related to something you need for a project? Is it something you worked on and think it could benefit
|
||||
the community?
|
||||
|
||||
Whatever it is, we'd love to hear about it!
|
||||
|
||||
2. Describe your requested feature in as much detail as possible. The more you can tell us about it, the better
|
||||
we'll be able to help you.
|
||||
3. Provide a *code snippet* that demonstrates the feature's usage.
|
||||
4. If the feature is related to a paper, please include a link.
|
||||
|
||||
If your issue is well written we're already 80% of the way there by the time you create it.
|
||||
|
||||
We have added [templates](https://github.com/huggingface/text-generation-inference/tree/main/.github/ISSUE_TEMPLATE)
|
||||
to help you get started with your issue.
|
||||
|
||||
## Do you want to implement a new model?
|
||||
|
||||
New models are constantly released and if you want to implement a new model, please provide the following information:
|
||||
|
||||
* A short description of the model and a link to the paper.
|
||||
* Link to the implementation if it is open-sourced.
|
||||
* Link to the model weights if they are available.
|
||||
|
||||
If you are willing to contribute the model yourself, let us know so we can help you add it to text-generation-inference!
|
||||
|
||||
## Do you want to add documentation?
|
||||
|
||||
We're always looking for improvements to the documentation that make it more clear and accurate. Please let us know
|
||||
how the documentation can be improved such as typos and any content that is missing, unclear or inaccurate. We'll be
|
||||
happy to make the changes or help you make a contribution if you're interested!
|
||||
|
||||
## I want to become a maintainer of the project. How do I get there?
|
||||
|
||||
TGI is a project led and managed by Hugging Face as it powers our internal services. However, we are happy to have
|
||||
motivated individuals from other organizations join us as maintainers with the goal of making TGI the best inference
|
||||
service.
|
||||
|
||||
If you are such an individual (or organization), please reach out to us and let's collaborate.
|
|
@ -1,5 +1,5 @@
|
|||
# Rust builder
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||
WORKDIR /usr/src
|
||||
|
||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Rust builder
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||
WORKDIR /usr/src
|
||||
|
||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
FROM lukemathwalker/cargo-chef:latest-rust-1.78 AS chef
|
||||
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
|
||||
WORKDIR /usr/src
|
||||
|
||||
ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
|
||||
|
|
|
@ -497,7 +497,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
|
|||
"Lowest: {:.2} {unit}",
|
||||
data.iter()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN)
|
||||
.unwrap_or(&f64::NAN)
|
||||
),
|
||||
Style::default().fg(Color::Reset),
|
||||
)]),
|
||||
|
@ -506,7 +506,7 @@ fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
|
|||
"Highest: {:.2} {unit}",
|
||||
data.iter()
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN)
|
||||
.unwrap_or(&f64::NAN)
|
||||
),
|
||||
Style::default().fg(Color::Reset),
|
||||
)]),
|
||||
|
@ -555,17 +555,17 @@ fn latency_throughput_chart<'a>(
|
|||
let min_latency: f64 = *latency_iter
|
||||
.clone()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let max_latency: f64 = *latency_iter
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let min_throughput: f64 = *throughput_iter
|
||||
.clone()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let max_throughput: f64 = *throughput_iter
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
|
||||
// Char min max values
|
||||
let min_x = if zoom {
|
||||
|
|
|
@ -156,17 +156,17 @@ fn avg_min_max(data: &[f64]) -> (f64, f64, f64) {
|
|||
let min = data
|
||||
.iter()
|
||||
.min_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
let max = data
|
||||
.iter()
|
||||
.max_by(|a, b| a.total_cmp(b))
|
||||
.unwrap_or(&std::f64::NAN);
|
||||
.unwrap_or(&f64::NAN);
|
||||
(average, *min, *max)
|
||||
}
|
||||
|
||||
fn px(data: &[f64], p: u32) -> f64 {
|
||||
let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
|
||||
*data.get(i).unwrap_or(&std::f64::NAN)
|
||||
*data.get(i).unwrap_or(&f64::NAN)
|
||||
}
|
||||
|
||||
fn format_value(value: f64, unit: &'static str) -> String {
|
||||
|
|
|
@ -37,7 +37,7 @@ pub(crate) fn percentiles(values: &[f64], pecents: &[i32]) -> BTreeMap<String, f
|
|||
.iter()
|
||||
.map(|&p| {
|
||||
let i = (f64::from(p) / 100.0 * values.len() as f64) as usize;
|
||||
(format!("p{p}"), *values.get(i).unwrap_or(&std::f64::NAN))
|
||||
(format!("p{p}"), *values.get(i).unwrap_or(&f64::NAN))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
{
|
||||
"details": {
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "eos_token",
|
||||
"generated_tokens": 8,
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 2502,
|
||||
"logprob": -1.734375,
|
||||
"special": false,
|
||||
"text": "image"
|
||||
},
|
||||
{
|
||||
"id": 2196,
|
||||
"logprob": -0.5756836,
|
||||
"special": false,
|
||||
"text": " result"
|
||||
},
|
||||
{
|
||||
"id": 604,
|
||||
"logprob": -0.007843018,
|
||||
"special": false,
|
||||
"text": " for"
|
||||
},
|
||||
{
|
||||
"id": 12254,
|
||||
"logprob": -1.7167969,
|
||||
"special": false,
|
||||
"text": " chicken"
|
||||
},
|
||||
{
|
||||
"id": 611,
|
||||
"logprob": -0.17053223,
|
||||
"special": false,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 573,
|
||||
"logprob": -0.7626953,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 8318,
|
||||
"logprob": -0.02709961,
|
||||
"special": false,
|
||||
"text": " beach"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": -0.20739746,
|
||||
"special": true,
|
||||
"text": "<eos>"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "image result for chicken on the beach"
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
{
|
||||
"details": {
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "eos_token",
|
||||
"generated_tokens": 12,
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 450,
|
||||
"logprob": -0.26342773,
|
||||
"special": false,
|
||||
"text": " The"
|
||||
},
|
||||
{
|
||||
"id": 21282,
|
||||
"logprob": -0.01838684,
|
||||
"special": false,
|
||||
"text": " cow"
|
||||
},
|
||||
{
|
||||
"id": 322,
|
||||
"logprob": -0.18041992,
|
||||
"special": false,
|
||||
"text": " and"
|
||||
},
|
||||
{
|
||||
"id": 521,
|
||||
"logprob": -0.62841797,
|
||||
"special": false,
|
||||
"text": " ch"
|
||||
},
|
||||
{
|
||||
"id": 21475,
|
||||
"logprob": -0.0037956238,
|
||||
"special": false,
|
||||
"text": "icken"
|
||||
},
|
||||
{
|
||||
"id": 526,
|
||||
"logprob": -0.018737793,
|
||||
"special": false,
|
||||
"text": " are"
|
||||
},
|
||||
{
|
||||
"id": 373,
|
||||
"logprob": -1.0820312,
|
||||
"special": false,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 263,
|
||||
"logprob": -0.5083008,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 25695,
|
||||
"logprob": -0.07128906,
|
||||
"special": false,
|
||||
"text": " beach"
|
||||
},
|
||||
{
|
||||
"id": 29889,
|
||||
"logprob": -0.12573242,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 32002,
|
||||
"logprob": -0.0029792786,
|
||||
"special": true,
|
||||
"text": "<end_of_utterance>"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": -0.00024962425,
|
||||
"special": true,
|
||||
"text": "</s>"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " The cow and chicken are on a beach."
|
||||
}
|
|
@ -0,0 +1,133 @@
|
|||
{
|
||||
"details": {
|
||||
"best_of_sequences": null,
|
||||
"finish_reason": "length",
|
||||
"generated_tokens": 20,
|
||||
"prefill": [],
|
||||
"seed": null,
|
||||
"tokens": [
|
||||
{
|
||||
"id": 415,
|
||||
"logprob": -0.04421997,
|
||||
"special": false,
|
||||
"text": " The"
|
||||
},
|
||||
{
|
||||
"id": 12072,
|
||||
"logprob": -0.13500977,
|
||||
"special": false,
|
||||
"text": " cow"
|
||||
},
|
||||
{
|
||||
"id": 349,
|
||||
"logprob": -0.06750488,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 6328,
|
||||
"logprob": -0.6352539,
|
||||
"special": false,
|
||||
"text": " standing"
|
||||
},
|
||||
{
|
||||
"id": 356,
|
||||
"logprob": -0.16186523,
|
||||
"special": false,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 272,
|
||||
"logprob": -0.5078125,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 10305,
|
||||
"logprob": -0.017913818,
|
||||
"special": false,
|
||||
"text": " beach"
|
||||
},
|
||||
{
|
||||
"id": 304,
|
||||
"logprob": -1.5205078,
|
||||
"special": false,
|
||||
"text": " and"
|
||||
},
|
||||
{
|
||||
"id": 272,
|
||||
"logprob": -0.029174805,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
},
|
||||
{
|
||||
"id": 13088,
|
||||
"logprob": -0.003479004,
|
||||
"special": false,
|
||||
"text": " chicken"
|
||||
},
|
||||
{
|
||||
"id": 349,
|
||||
"logprob": -0.0035095215,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 6398,
|
||||
"logprob": -0.3088379,
|
||||
"special": false,
|
||||
"text": " sitting"
|
||||
},
|
||||
{
|
||||
"id": 356,
|
||||
"logprob": -0.027755737,
|
||||
"special": false,
|
||||
"text": " on"
|
||||
},
|
||||
{
|
||||
"id": 264,
|
||||
"logprob": -0.31884766,
|
||||
"special": false,
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 17972,
|
||||
"logprob": -0.047943115,
|
||||
"special": false,
|
||||
"text": " pile"
|
||||
},
|
||||
{
|
||||
"id": 302,
|
||||
"logprob": -0.0002925396,
|
||||
"special": false,
|
||||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 2445,
|
||||
"logprob": -0.02935791,
|
||||
"special": false,
|
||||
"text": " money"
|
||||
},
|
||||
{
|
||||
"id": 28723,
|
||||
"logprob": -0.031219482,
|
||||
"special": false,
|
||||
"text": "."
|
||||
},
|
||||
{
|
||||
"id": 32002,
|
||||
"logprob": -0.00034475327,
|
||||
"special": true,
|
||||
"text": "<end_of_utterance>"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"logprob": -1.1920929e-07,
|
||||
"special": true,
|
||||
"text": "</s>"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " The cow is standing on the beach and the chicken is sitting on a pile of money."
|
||||
}
|
|
@ -22,6 +22,12 @@ async def flash_pali_gemma(flash_pali_gemma_handle):
|
|||
return flash_pali_gemma_handle.client
|
||||
|
||||
|
||||
def get_chicken():
|
||||
with open("integration-tests/images/chicken_on_money.png", "rb") as image_file:
|
||||
encoded_string = base64.b64encode(image_file.read())
|
||||
return f"data:image/png;base64,{encoded_string.decode('utf-8')}"
|
||||
|
||||
|
||||
def get_cow_beach():
|
||||
with open("integration-tests/images/cow_beach.png", "rb") as image_file:
|
||||
encoded_string = base64.b64encode(image_file.read())
|
||||
|
@ -37,3 +43,20 @@ async def test_flash_pali_gemma(flash_pali_gemma, response_snapshot):
|
|||
|
||||
assert response.generated_text == "beach"
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.private
|
||||
async def test_flash_pali_gemma_two_images(flash_pali_gemma, response_snapshot):
|
||||
chicken = get_chicken()
|
||||
cow_beach = get_cow_beach()
|
||||
response = await flash_pali_gemma.generate(
|
||||
f"caption![]({chicken})![]({cow_beach})\n",
|
||||
max_new_tokens=20,
|
||||
)
|
||||
# Is PaliGemma not able to handle two separate images? At least we
|
||||
# get output showing that both images are used.
|
||||
assert (
|
||||
response.generated_text == "image result for chicken on the beach"
|
||||
), f"{repr(response.generated_text)}"
|
||||
assert response == response_snapshot
|
||||
|
|
|
@ -23,6 +23,12 @@ def get_chicken():
|
|||
return f"data:image/png;base64,{encoded_string.decode('utf-8')}"
|
||||
|
||||
|
||||
def get_cow_beach():
|
||||
with open("integration-tests/images/cow_beach.png", "rb") as image_file:
|
||||
encoded_string = base64.b64encode(image_file.read())
|
||||
return f"data:image/png;base64,{encoded_string.decode('utf-8')}"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_idefics(idefics, response_snapshot):
|
||||
chicken = get_chicken()
|
||||
|
@ -39,6 +45,21 @@ async def test_idefics(idefics, response_snapshot):
|
|||
assert response == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.private
|
||||
async def test_idefics_two_images(idefics, response_snapshot):
|
||||
chicken = get_chicken()
|
||||
cow_beach = get_cow_beach()
|
||||
response = await idefics.generate(
|
||||
f"User:![]({chicken})![]({cow_beach})Where are the cow and chicken?<end_of_utterance> \nAssistant:",
|
||||
max_new_tokens=20,
|
||||
)
|
||||
assert (
|
||||
response.generated_text == " The cow and chicken are on a beach."
|
||||
), f"{repr(response.generated_text)}"
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_idefics_load(idefics, generate_load, response_snapshot):
|
||||
chicken = get_chicken()
|
||||
|
|
|
@ -9,6 +9,12 @@ def get_chicken():
|
|||
return f"data:image/png;base64,{encoded_string.decode('utf-8')}"
|
||||
|
||||
|
||||
def get_cow_beach():
|
||||
with open("integration-tests/images/cow_beach.png", "rb") as image_file:
|
||||
encoded_string = base64.b64encode(image_file.read())
|
||||
return f"data:image/png;base64,{encoded_string.decode('utf-8')}"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def flash_idefics2_next_handle(launcher):
|
||||
with launcher(
|
||||
|
@ -38,6 +44,23 @@ async def test_flash_idefics2_next_simple(flash_idefics2_next, response_snapshot
|
|||
assert response == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.private
|
||||
async def test_flash_idefics2_two_images(flash_idefics2_next, response_snapshot):
|
||||
chicken = get_chicken()
|
||||
cow_beach = get_cow_beach()
|
||||
response = await flash_idefics2_next.generate(
|
||||
f"User:![]({chicken})![]({cow_beach})Where are the cow and chicken?<end_of_utterance> \nAssistant:",
|
||||
max_new_tokens=20,
|
||||
)
|
||||
assert (
|
||||
response.generated_text
|
||||
== " The cow is standing on the beach and the chicken is sitting on a pile of money."
|
||||
), f"{repr(response.generated_text)}"
|
||||
assert response.details.generated_tokens == 20
|
||||
assert response == response_snapshot
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.private
|
||||
async def test_flash_idefics2_next_all_params(flash_idefics2_next, response_snapshot):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::fs;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("cargo:rerun-if-changed=../../proto/**");
|
||||
println!("cargo:rerun-if-changed=../../proto/");
|
||||
|
||||
fs::create_dir_all("src/v2/pb").unwrap_or(());
|
||||
let mut config = prost_build::Config::new();
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
[toolchain]
|
||||
# Released on: 02 May, 2024
|
||||
# https://releases.rs/docs/1.78.0/
|
||||
channel = "1.78.0"
|
||||
# Released on: June 13, 2024
|
||||
# https://releases.rs/docs/1.79.0/
|
||||
channel = "1.79.0"
|
||||
components = ["rustfmt", "clippy"]
|
||||
|
|
|
@ -53,7 +53,9 @@ def image_text_replacement(image_input, config, image_id) -> str:
|
|||
num_features = get_number_of_features(height, width, config)
|
||||
from loguru import logger
|
||||
|
||||
logger.info(f"Found {num_features} in image of resolution {height}x{width}")
|
||||
logger.info(
|
||||
f"Found {num_features} features in image of resolution {height}x{width}"
|
||||
)
|
||||
return "<image>" * num_features
|
||||
|
||||
elif config.model_type == "paligemma":
|
||||
|
@ -133,23 +135,41 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
|
|||
def batch_tokenized_inputs(
|
||||
cls, requests: Iterable[generate_pb2.Request], tokenizer, processor, config
|
||||
):
|
||||
# Process images first. We need all of them so that the processor
|
||||
# can make the image splits the same size. And we need the final
|
||||
# sizes to insert correct number of image tokens.
|
||||
images = []
|
||||
for r in requests:
|
||||
for chunk in r.input_chunks.chunks:
|
||||
chunk_type = chunk.WhichOneof("chunk")
|
||||
if chunk_type == "text":
|
||||
pass
|
||||
elif chunk_type == "image":
|
||||
image = Image.open(BytesIO(chunk.image.data))
|
||||
if config.model_type == "llava_next":
|
||||
images.append(image)
|
||||
else:
|
||||
images.append([image])
|
||||
else:
|
||||
raise RuntimeError(f"Invalid chunk type {chunk_type}")
|
||||
|
||||
if images:
|
||||
image_inputs = processor.image_processor(images, return_tensors="pt")
|
||||
else:
|
||||
image_inputs = None
|
||||
|
||||
batch_inputs = []
|
||||
image_inputs = []
|
||||
max_truncation = 0
|
||||
image_id = 0
|
||||
for r in requests:
|
||||
full_text = ""
|
||||
image_id = 0
|
||||
for chunk in r.input_chunks.chunks:
|
||||
chunk_type = chunk.WhichOneof("chunk")
|
||||
if chunk_type == "text":
|
||||
full_text += chunk.text
|
||||
elif chunk_type == "image":
|
||||
image = Image.open(BytesIO(chunk.image.data))
|
||||
image_input = processor.image_processor(image, return_tensors="pt")
|
||||
full_text += image_text_replacement(image_input, config, image_id)
|
||||
image_inputs.append(image_input)
|
||||
else:
|
||||
raise RuntimeError(f"Invalid chunk type {chunk_type}")
|
||||
full_text += image_text_replacement(image_inputs, config, image_id)
|
||||
image_id += 1
|
||||
|
||||
batch_inputs.append(full_text)
|
||||
max_truncation = max(max_truncation, r.truncate)
|
||||
|
@ -160,24 +180,7 @@ class VlmCausalLMBatch(FlashCausalLMBatch):
|
|||
max_length=max_truncation,
|
||||
add_special_tokens=not config.model_type == "paligemma",
|
||||
)["input_ids"]
|
||||
if image_inputs:
|
||||
image_input = image_inputs[0]
|
||||
new_image_inputs = {
|
||||
"pixel_values": torch.cat(
|
||||
[img["pixel_values"] for img in image_inputs], dim=0
|
||||
),
|
||||
}
|
||||
if "pixel_attention_mask" in image_input:
|
||||
new_image_inputs["pixel_attention_mask"] = torch.cat(
|
||||
[img["pixel_attention_mask"] for img in image_inputs], dim=0
|
||||
)
|
||||
if "image_sizes" in image_input:
|
||||
new_image_inputs["image_sizes"] = torch.cat(
|
||||
[img["image_sizes"] for img in image_inputs], dim=0
|
||||
)
|
||||
image_inputs = new_image_inputs
|
||||
else:
|
||||
image_inputs = None
|
||||
|
||||
return batch_tokenized_inputs, image_inputs
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -270,7 +270,11 @@ def serve(
|
|||
interceptors=[
|
||||
ExceptionInterceptor(),
|
||||
UDSOpenTelemetryAioServerInterceptor(),
|
||||
]
|
||||
],
|
||||
options=[
|
||||
# Set the maximum possible message length: i32::MAX
|
||||
("grpc.max_receive_message_length", (1 << 31) - 1)
|
||||
],
|
||||
)
|
||||
generate_pb2_grpc.add_TextGenerationServiceServicer_to_server(
|
||||
TextGenerationService(model, Cache(), quantize, server_urls), server
|
||||
|
|
Loading…
Reference in New Issue