This commit is contained in:
OlivierDehaene 2023-12-11 14:55:03 +01:00
parent 72ee382ded
commit d0841cc8eb
8 changed files with 78 additions and 78 deletions

134
Cargo.lock generated
View File

@ -40,9 +40,9 @@ dependencies = [
[[package]]
name = "anstream"
version = "0.6.4"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44"
checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6"
dependencies = [
"anstyle",
"anstyle-parse",
@ -69,9 +69,9 @@ dependencies = [
[[package]]
name = "anstyle-query"
version = "1.0.1"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3a318f1f38d2418400f8209655bfd825785afd25aa30bb7ba6cc792e4596748"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys 0.52.0",
]
@ -128,7 +128,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -139,7 +139,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -362,7 +362,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -813,7 +813,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -994,9 +994,9 @@ dependencies = [
[[package]]
name = "http-body"
version = "0.4.5"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
dependencies = [
"bytes",
"http",
@ -1174,9 +1174,9 @@ dependencies = [
[[package]]
name = "itoa"
version = "1.0.9"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
[[package]]
name = "js-sys"
@ -1195,9 +1195,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.150"
version = "0.2.151"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
[[package]]
name = "libm"
@ -1336,7 +1336,7 @@ checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -1387,9 +1387,9 @@ dependencies = [
[[package]]
name = "mio"
version = "0.8.9"
version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0"
checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09"
dependencies = [
"libc",
"log",
@ -1415,7 +1415,7 @@ checksum = "531c82a934da419bed3da09bd87d6e98c72f8d4aa755427b3b009c2b8b8c433c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -1584,9 +1584,9 @@ dependencies = [
[[package]]
name = "once_cell"
version = "1.18.0"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "onig"
@ -1633,7 +1633,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -1849,7 +1849,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -1872,9 +1872,9 @@ checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
[[package]]
name = "portable-atomic"
version = "1.5.1"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3bccab0e7fd7cc19f820a1c8c91720af652d0c88dc9664dd72aef2614f04af3b"
checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
[[package]]
name = "powerfmt"
@ -1895,7 +1895,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
dependencies = [
"proc-macro2",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -1968,7 +1968,7 @@ dependencies = [
"prost 0.12.3",
"prost-types",
"regex",
"syn 2.0.39",
"syn 2.0.40",
"tempfile",
"which",
]
@ -1996,7 +1996,7 @@ dependencies = [
"itertools 0.11.0",
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -2245,9 +2245,9 @@ dependencies = [
[[package]]
name = "ring"
version = "0.17.6"
version = "0.17.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "684d5e6e18f669ccebf64a92236bb7db9a34f07be010e3627368182027180866"
checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74"
dependencies = [
"cc",
"getrandom",
@ -2278,7 +2278,7 @@ dependencies = [
"quote",
"rust-embed-utils",
"shellexpand",
"syn 2.0.39",
"syn 2.0.40",
"walkdir",
]
@ -2309,9 +2309,9 @@ dependencies = [
[[package]]
name = "rustix"
version = "0.38.26"
version = "0.38.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a"
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
dependencies = [
"bitflags 2.4.1",
"errno",
@ -2334,12 +2334,12 @@ dependencies = [
[[package]]
name = "rustls"
version = "0.21.9"
version = "0.21.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9"
checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba"
dependencies = [
"log",
"ring 0.17.6",
"ring 0.17.7",
"rustls-webpki",
"sct",
]
@ -2359,7 +2359,7 @@ version = "0.101.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
dependencies = [
"ring 0.17.6",
"ring 0.17.7",
"untrusted 0.9.0",
]
@ -2371,9 +2371,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
[[package]]
name = "ryu"
version = "1.0.15"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
[[package]]
name = "same-file"
@ -2405,7 +2405,7 @@ version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
dependencies = [
"ring 0.17.6",
"ring 0.17.7",
"untrusted 0.9.0",
]
@ -2455,7 +2455,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -2649,7 +2649,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -2665,9 +2665,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.39"
version = "2.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
checksum = "13fa70a4ee923979ffb522cacce59d34421ebdea5625e1073c4326ef9d2dd42e"
dependencies = [
"proc-macro2",
"quote",
@ -2754,7 +2754,7 @@ dependencies = [
[[package]]
name = "text-generation-benchmark"
version = "1.2.0"
version = "1.3.0"
dependencies = [
"average",
"clap",
@ -2775,7 +2775,7 @@ dependencies = [
[[package]]
name = "text-generation-client"
version = "1.2.0"
version = "1.3.0"
dependencies = [
"futures",
"grpc-metadata",
@ -2791,7 +2791,7 @@ dependencies = [
[[package]]
name = "text-generation-launcher"
version = "1.2.0"
version = "1.3.0"
dependencies = [
"clap",
"ctrlc",
@ -2807,7 +2807,7 @@ dependencies = [
[[package]]
name = "text-generation-router"
version = "1.2.0"
version = "1.3.0"
dependencies = [
"async-stream",
"axum",
@ -2857,7 +2857,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -2952,9 +2952,9 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.34.0"
version = "1.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9"
checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c"
dependencies = [
"backtrace",
"bytes",
@ -2987,7 +2987,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -3102,7 +3102,7 @@ dependencies = [
"proc-macro2",
"prost-build",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -3175,7 +3175,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -3272,9 +3272,9 @@ dependencies = [
[[package]]
name = "try-lock"
version = "0.2.4"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
[[package]]
name = "typenum"
@ -3293,9 +3293,9 @@ dependencies = [
[[package]]
name = "unicode-bidi"
version = "0.3.13"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416"
[[package]]
name = "unicode-ident"
@ -3362,7 +3362,7 @@ dependencies = [
"log",
"native-tls",
"once_cell",
"rustls 0.21.9",
"rustls 0.21.10",
"rustls-webpki",
"serde",
"serde_json",
@ -3415,7 +3415,7 @@ dependencies = [
"proc-macro2",
"quote",
"regex",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]
@ -3511,7 +3511,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
"wasm-bindgen-shared",
]
@ -3545,7 +3545,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -3572,7 +3572,7 @@ version = "0.22.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53"
dependencies = [
"ring 0.17.6",
"ring 0.17.7",
"untrusted 0.9.0",
]
@ -3835,22 +3835,22 @@ dependencies = [
[[package]]
name = "zerocopy"
version = "0.7.28"
version = "0.7.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d6f15f7ade05d2a4935e34a457b936c23dc70a05cc1d97133dc99e7a3fe0f0e"
checksum = "306dca4455518f1f31635ec308b6b3e4eb1b11758cefafc782827d0aa7acb5c7"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.28"
version = "0.7.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbbad221e3f78500350ecbd7dfa4e63ef945c05f4c61cb7f4d3f84cd0bba649b"
checksum = "be912bf68235a88fbefd1b73415cb218405958d1655b2ece9035a19920bdf6ba"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.39",
"syn 2.0.40",
]
[[package]]

View File

@ -8,7 +8,7 @@ members = [
]
[workspace.package]
version = "1.2.0"
version = "1.3.0"
edition = "2021"
authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"

View File

@ -62,7 +62,7 @@ For a detailed starting guide, please see the [Quick Tour](https://huggingface.c
model=HuggingFaceH4/zephyr-7b-beta
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 --model-id $model
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
```
And then you can make requests like
@ -76,7 +76,7 @@ curl 127.0.0.1:8080/generate \
**Note:** To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher. For running the Docker container on a machine with no GPUs or CUDA support, it is enough to remove the `--gpus all` flag and add `--disable-custom-kernels`, please note CPU is not the intended platform for this project, so performance might be subpar.
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2-rocm --model-id $model` instead of the command above.
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3-rocm --model-id $model` instead of the command above.
To see all options to serve your models (in the [code](https://github.com/huggingface/text-generation-inference/blob/main/launcher/src/main.rs) or in the cli):
```
@ -106,7 +106,7 @@ model=meta-llama/Llama-2-7b-chat-hf
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
token=<your cli READ token>
docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 --model-id $model
docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
```
### A note on Shared Memory (shm)

View File

@ -10,7 +10,7 @@
"name": "Apache 2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
},
"version": "1.2.0"
"version": "1.3.0"
},
"paths": {
"/": {

View File

@ -19,6 +19,6 @@ docker run --gpus all \
--shm-size 1g \
-e HUGGING_FACE_HUB_TOKEN=$token \
-p 8080:80 \
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 \
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 \
--model-id $model
```

View File

@ -8,7 +8,7 @@ Let's say you want to deploy [Falcon-7B Instruct](https://huggingface.co/tiiuae/
model=tiiuae/falcon-7b-instruct
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 --model-id $model
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
```
<Tip warning={true}>
@ -20,7 +20,7 @@ To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://d
TGI also supports ROCm-enabled AMD GPUs (only MI210 and MI250 are tested), details are available in the [Supported Hardware section](./supported_models#supported-hardware) and [AMD documentation](https://rocm.docs.amd.com/en/latest/deploy/docker.html). To launch TGI on ROCm GPUs, please use instead:
```bash
docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2-rocm --model-id $model
docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3-rocm --model-id $model
```
Once TGI is running, you can use the `generate` endpoint by doing requests. To learn more about how to query the endpoints, check the [Consuming TGI](./basic_tutorials/consuming_tgi) section, where we show examples with utility libraries and UIs. Below you can see a simple snippet to query the endpoint.
@ -91,7 +91,7 @@ curl 127.0.0.1:8080/generate \
To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
```bash
docker run ghcr.io/huggingface/text-generation-inference:1.2 --help
docker run ghcr.io/huggingface/text-generation-inference:1.3 --help
```
</Tip>

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-integration-tests"
version = "1.2.0"
version = "1.3.0"
description = "Text Generation Inference integration tests"
authors = ["Nicolas Patry <nicolas@huggingface.co>"]

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-server"
version = "1.2.0"
version = "1.3.0"
description = "Text Generation Inference Python gRPC Server"
authors = ["Olivier Dehaene <olivier@huggingface.co>"]