v1.3.0
This commit is contained in:
parent
72ee382ded
commit
d0841cc8eb
|
@ -40,9 +40,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.4"
|
||||
version = "0.6.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ab91ebe16eb252986481c5b62f6098f3b698a45e34b5b98200cf20dd2484a44"
|
||||
checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
|
@ -69,9 +69,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.0.1"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a3a318f1f38d2418400f8209655bfd825785afd25aa30bb7ba6cc792e4596748"
|
||||
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
|
||||
dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
@ -128,7 +128,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -139,7 +139,7 @@ checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -362,7 +362,7 @@ dependencies = [
|
|||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -813,7 +813,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -994,9 +994,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "http-body"
|
||||
version = "0.4.5"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
|
||||
checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"http",
|
||||
|
@ -1174,9 +1174,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.9"
|
||||
version = "1.0.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
|
||||
checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
|
@ -1195,9 +1195,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.150"
|
||||
version = "0.2.151"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
|
||||
checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4"
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
|
@ -1336,7 +1336,7 @@ checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1387,9 +1387,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "0.8.9"
|
||||
version = "0.8.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3dce281c5e46beae905d4de1870d8b1509a9142b62eedf18b443b011ca8343d0"
|
||||
checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
|
@ -1415,7 +1415,7 @@ checksum = "531c82a934da419bed3da09bd87d6e98c72f8d4aa755427b3b009c2b8b8c433c"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1584,9 +1584,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
version = "1.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
|
||||
|
||||
[[package]]
|
||||
name = "onig"
|
||||
|
@ -1633,7 +1633,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1849,7 +1849,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1872,9 +1872,9 @@ checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
|||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.5.1"
|
||||
version = "1.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3bccab0e7fd7cc19f820a1c8c91720af652d0c88dc9664dd72aef2614f04af3b"
|
||||
checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
|
||||
|
||||
[[package]]
|
||||
name = "powerfmt"
|
||||
|
@ -1895,7 +1895,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -1968,7 +1968,7 @@ dependencies = [
|
|||
"prost 0.12.3",
|
||||
"prost-types",
|
||||
"regex",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
"tempfile",
|
||||
"which",
|
||||
]
|
||||
|
@ -1996,7 +1996,7 @@ dependencies = [
|
|||
"itertools 0.11.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2245,9 +2245,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ring"
|
||||
version = "0.17.6"
|
||||
version = "0.17.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "684d5e6e18f669ccebf64a92236bb7db9a34f07be010e3627368182027180866"
|
||||
checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"getrandom",
|
||||
|
@ -2278,7 +2278,7 @@ dependencies = [
|
|||
"quote",
|
||||
"rust-embed-utils",
|
||||
"shellexpand",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
|
@ -2309,9 +2309,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.26"
|
||||
version = "0.38.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a"
|
||||
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
|
||||
dependencies = [
|
||||
"bitflags 2.4.1",
|
||||
"errno",
|
||||
|
@ -2334,12 +2334,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.21.9"
|
||||
version = "0.21.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "629648aced5775d558af50b2b4c7b02983a04b312126d45eeead26e7caa498b9"
|
||||
checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba"
|
||||
dependencies = [
|
||||
"log",
|
||||
"ring 0.17.6",
|
||||
"ring 0.17.7",
|
||||
"rustls-webpki",
|
||||
"sct",
|
||||
]
|
||||
|
@ -2359,7 +2359,7 @@ version = "0.101.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
|
||||
dependencies = [
|
||||
"ring 0.17.6",
|
||||
"ring 0.17.7",
|
||||
"untrusted 0.9.0",
|
||||
]
|
||||
|
||||
|
@ -2371,9 +2371,9 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
|
|||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.15"
|
||||
version = "1.0.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
|
||||
checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
|
@ -2405,7 +2405,7 @@ version = "0.7.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
|
||||
dependencies = [
|
||||
"ring 0.17.6",
|
||||
"ring 0.17.7",
|
||||
"untrusted 0.9.0",
|
||||
]
|
||||
|
||||
|
@ -2455,7 +2455,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2649,7 +2649,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2665,9 +2665,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.39"
|
||||
version = "2.0.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
|
||||
checksum = "13fa70a4ee923979ffb522cacce59d34421ebdea5625e1073c4326ef9d2dd42e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
@ -2754,7 +2754,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "text-generation-benchmark"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"average",
|
||||
"clap",
|
||||
|
@ -2775,7 +2775,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "text-generation-client"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"futures",
|
||||
"grpc-metadata",
|
||||
|
@ -2791,7 +2791,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "text-generation-launcher"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"ctrlc",
|
||||
|
@ -2807,7 +2807,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "text-generation-router"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"axum",
|
||||
|
@ -2857,7 +2857,7 @@ checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2952,9 +2952,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tokio"
|
||||
version = "1.34.0"
|
||||
version = "1.35.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0c014766411e834f7af5b8f4cf46257aab4036ca95e9d2c144a10f59ad6f5b9"
|
||||
checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c"
|
||||
dependencies = [
|
||||
"backtrace",
|
||||
"bytes",
|
||||
|
@ -2987,7 +2987,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3102,7 +3102,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"prost-build",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3175,7 +3175,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3272,9 +3272,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "try-lock"
|
||||
version = "0.2.4"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed"
|
||||
checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
|
@ -3293,9 +3293,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "unicode-bidi"
|
||||
version = "0.3.13"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
|
||||
checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
|
@ -3362,7 +3362,7 @@ dependencies = [
|
|||
"log",
|
||||
"native-tls",
|
||||
"once_cell",
|
||||
"rustls 0.21.9",
|
||||
"rustls 0.21.10",
|
||||
"rustls-webpki",
|
||||
"serde",
|
||||
"serde_json",
|
||||
|
@ -3415,7 +3415,7 @@ dependencies = [
|
|||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3511,7 +3511,7 @@ dependencies = [
|
|||
"once_cell",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
|
@ -3545,7 +3545,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
|
|||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
@ -3572,7 +3572,7 @@ version = "0.22.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53"
|
||||
dependencies = [
|
||||
"ring 0.17.6",
|
||||
"ring 0.17.7",
|
||||
"untrusted 0.9.0",
|
||||
]
|
||||
|
||||
|
@ -3835,22 +3835,22 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.7.28"
|
||||
version = "0.7.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7d6f15f7ade05d2a4935e34a457b936c23dc70a05cc1d97133dc99e7a3fe0f0e"
|
||||
checksum = "306dca4455518f1f31635ec308b6b3e4eb1b11758cefafc782827d0aa7acb5c7"
|
||||
dependencies = [
|
||||
"zerocopy-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy-derive"
|
||||
version = "0.7.28"
|
||||
version = "0.7.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbbad221e3f78500350ecbd7dfa4e63ef945c05f4c61cb7f4d3f84cd0bba649b"
|
||||
checksum = "be912bf68235a88fbefd1b73415cb218405958d1655b2ece9035a19920bdf6ba"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.39",
|
||||
"syn 2.0.40",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -8,7 +8,7 @@ members = [
|
|||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
edition = "2021"
|
||||
authors = ["Olivier Dehaene"]
|
||||
homepage = "https://github.com/huggingface/text-generation-inference"
|
||||
|
|
|
@ -62,7 +62,7 @@ For a detailed starting guide, please see the [Quick Tour](https://huggingface.c
|
|||
model=HuggingFaceH4/zephyr-7b-beta
|
||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 --model-id $model
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
|
||||
```
|
||||
|
||||
And then you can make requests like
|
||||
|
@ -76,7 +76,7 @@ curl 127.0.0.1:8080/generate \
|
|||
|
||||
**Note:** To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 11.8 or higher. For running the Docker container on a machine with no GPUs or CUDA support, it is enough to remove the `--gpus all` flag and add `--disable-custom-kernels`, please note CPU is not the intended platform for this project, so performance might be subpar.
|
||||
|
||||
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2-rocm --model-id $model` instead of the command above.
|
||||
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3-rocm --model-id $model` instead of the command above.
|
||||
|
||||
To see all options to serve your models (in the [code](https://github.com/huggingface/text-generation-inference/blob/main/launcher/src/main.rs) or in the cli):
|
||||
```
|
||||
|
@ -106,7 +106,7 @@ model=meta-llama/Llama-2-7b-chat-hf
|
|||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||
token=<your cli READ token>
|
||||
|
||||
docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 --model-id $model
|
||||
docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
|
||||
```
|
||||
|
||||
### A note on Shared Memory (shm)
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
"name": "Apache 2.0",
|
||||
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
||||
},
|
||||
"version": "1.2.0"
|
||||
"version": "1.3.0"
|
||||
},
|
||||
"paths": {
|
||||
"/": {
|
||||
|
|
|
@ -19,6 +19,6 @@ docker run --gpus all \
|
|||
--shm-size 1g \
|
||||
-e HUGGING_FACE_HUB_TOKEN=$token \
|
||||
-p 8080:80 \
|
||||
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 \
|
||||
-v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 \
|
||||
--model-id $model
|
||||
```
|
||||
|
|
|
@ -8,7 +8,7 @@ Let's say you want to deploy [Falcon-7B Instruct](https://huggingface.co/tiiuae/
|
|||
model=tiiuae/falcon-7b-instruct
|
||||
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
|
||||
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 --model-id $model
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3 --model-id $model
|
||||
```
|
||||
|
||||
<Tip warning={true}>
|
||||
|
@ -20,7 +20,7 @@ To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://d
|
|||
TGI also supports ROCm-enabled AMD GPUs (only MI210 and MI250 are tested), details are available in the [Supported Hardware section](./supported_models#supported-hardware) and [AMD documentation](https://rocm.docs.amd.com/en/latest/deploy/docker.html). To launch TGI on ROCm GPUs, please use instead:
|
||||
|
||||
```bash
|
||||
docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2-rocm --model-id $model
|
||||
docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.3-rocm --model-id $model
|
||||
```
|
||||
|
||||
Once TGI is running, you can use the `generate` endpoint by doing requests. To learn more about how to query the endpoints, check the [Consuming TGI](./basic_tutorials/consuming_tgi) section, where we show examples with utility libraries and UIs. Below you can see a simple snippet to query the endpoint.
|
||||
|
@ -91,7 +91,7 @@ curl 127.0.0.1:8080/generate \
|
|||
To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
|
||||
|
||||
```bash
|
||||
docker run ghcr.io/huggingface/text-generation-inference:1.2 --help
|
||||
docker run ghcr.io/huggingface/text-generation-inference:1.3 --help
|
||||
```
|
||||
|
||||
</Tip>
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "text-generation-integration-tests"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
description = "Text Generation Inference integration tests"
|
||||
authors = ["Nicolas Patry <nicolas@huggingface.co>"]
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "text-generation-server"
|
||||
version = "1.2.0"
|
||||
version = "1.3.0"
|
||||
description = "Text Generation Inference Python gRPC Server"
|
||||
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
||||
|
||||
|
|
Loading…
Reference in New Issue