This commit is contained in:
OlivierDehaene 2024-04-12 18:38:34 +02:00 committed by GitHub
parent 275caa04b1
commit c38a7d7ddd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 124 additions and 133 deletions

213
Cargo.lock generated
View File

@ -96,9 +96,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.81"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
[[package]]
name = "arbitrary"
@ -120,7 +120,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -159,18 +159,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
name = "async-trait"
version = "0.1.79"
version = "0.1.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681"
checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -375,15 +375,15 @@ dependencies = [
[[package]]
name = "built"
version = "0.7.1"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38d17f4d6e4dc36d1a02fbedc2753a096848e7c1b0772f7654eab8e2c927dd53"
checksum = "41bfbdb21256b87a8b5e80fab81a8eed158178e812fd7ba451907518b2742f16"
[[package]]
name = "bumpalo"
version = "3.15.4"
version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa"
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "bytecount"
@ -449,9 +449,9 @@ checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
[[package]]
name = "cc"
version = "1.0.90"
version = "1.0.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
checksum = "2678b2e3449475e95b0aa6f9b506a28e61b3dc8996592b983695e8ebb58a8b41"
dependencies = [
"jobserver",
"libc",
@ -459,9 +459,9 @@ dependencies = [
[[package]]
name = "cfg-expr"
version = "0.15.7"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa50868b64a9a6fda9d593ce778849ea8715cd2a3d2cc17ffdb4a2f2f2f1961d"
checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02"
dependencies = [
"smallvec",
"target-lexicon",
@ -498,7 +498,7 @@ dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim 0.11.0",
"strsim 0.11.1",
]
[[package]]
@ -510,7 +510,7 @@ dependencies = [
"heck 0.5.0",
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -812,9 +812,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "encoding_rs"
version = "0.8.33"
version = "0.8.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
dependencies = [
"cfg-if",
]
@ -1018,7 +1018,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -1072,9 +1072,9 @@ dependencies = [
[[package]]
name = "getrandom"
version = "0.2.12"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c"
dependencies = [
"cfg-if",
"js-sys",
@ -1111,9 +1111,9 @@ dependencies = [
[[package]]
name = "h2"
version = "0.3.25"
version = "0.3.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fbd2820c5e49886948654ab546d0688ff24530286bdcf8fca3cefb16d4618eb"
checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8"
dependencies = [
"bytes",
"fnv",
@ -1198,15 +1198,6 @@ dependencies = [
"ureq",
]
[[package]]
name = "home"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "hostname"
version = "0.3.1"
@ -1432,7 +1423,7 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -1485,9 +1476,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "jobserver"
version = "0.1.28"
version = "0.1.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
checksum = "f08474e32172238f2827bd160c67871cdb2801430f65c3979184dc362e3ca118"
dependencies = [
"libc",
]
@ -1574,13 +1565,12 @@ checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058"
[[package]]
name = "libredox"
version = "0.0.1"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85c833ca1e66078851dba29046874e38f08b2c883700aa29a03ddd3b23814ee8"
checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
dependencies = [
"bitflags 2.5.0",
"libc",
"redox_syscall",
]
[[package]]
@ -1713,7 +1703,7 @@ checksum = "38b4faf00617defe497754acde3024865bc143d44a86799b24e191ecff91354f"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -1801,14 +1791,14 @@ checksum = "f686d68a09079e63b1d2c64aa305095887ce50565f00a922ebfaeeee0d9ba6ce"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
name = "multimap"
version = "0.8.3"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
[[package]]
name = "muxado"
@ -1991,7 +1981,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -2121,7 +2111,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -2337,14 +2327,14 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
name = "pin-project-lite"
version = "0.2.13"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02"
[[package]]
name = "pin-utils"
@ -2396,7 +2386,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
dependencies = [
"proc-macro2",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -2448,7 +2438,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd"
dependencies = [
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -2463,34 +2453,33 @@ dependencies = [
[[package]]
name = "prost"
version = "0.12.3"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a"
checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922"
dependencies = [
"bytes",
"prost-derive 0.12.3",
"prost-derive 0.12.4",
]
[[package]]
name = "prost-build"
version = "0.12.3"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2"
checksum = "80b776a1b2dc779f5ee0641f8ade0125bc1298dd41a9a0c16d8bd57b42d222b1"
dependencies = [
"bytes",
"heck 0.4.1",
"itertools 0.11.0",
"heck 0.5.0",
"itertools 0.12.1",
"log",
"multimap",
"once_cell",
"petgraph",
"prettyplease",
"prost 0.12.3",
"prost 0.12.4",
"prost-types",
"regex",
"syn 2.0.55",
"syn 2.0.58",
"tempfile",
"which",
]
[[package]]
@ -2508,24 +2497,24 @@ dependencies = [
[[package]]
name = "prost-derive"
version = "0.12.3"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e"
checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48"
dependencies = [
"anyhow",
"itertools 0.11.0",
"itertools 0.12.1",
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
name = "prost-types"
version = "0.12.3"
version = "0.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "193898f59edcf43c26227dcd4c8427f00d99d61e95dcde58dabd49fa291d470e"
checksum = "3235c33eb02c1f1e212abdbe34c78b264b038fb58ca612664343271e36e55ffe"
dependencies = [
"prost 0.12.3",
"prost 0.12.4",
]
[[package]]
@ -2561,9 +2550,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
[[package]]
name = "quote"
version = "1.0.35"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
@ -2716,9 +2705,9 @@ dependencies = [
[[package]]
name = "redox_users"
version = "0.4.4"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a18479200779601e498ada4e8c1e1f50e3ee19deb0259c25825a98b5603b2cb4"
checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891"
dependencies = [
"getrandom",
"libredox",
@ -2875,7 +2864,7 @@ dependencies = [
"quote",
"rust-embed-utils",
"shellexpand",
"syn 2.0.55",
"syn 2.0.58",
"walkdir",
]
@ -2971,9 +2960,9 @@ dependencies = [
[[package]]
name = "rustversion"
version = "1.0.14"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47"
[[package]]
name = "ryu"
@ -3017,9 +3006,9 @@ dependencies = [
[[package]]
name = "security-framework"
version = "2.9.2"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05b64fb303737d99b81884b2c63433e9ae28abebe5eb5045dcdd175dc2ecf4de"
checksum = "770452e37cad93e0a50d5abc3990d2bc351c36d0328f86cefec2f2fb206eaef6"
dependencies = [
"bitflags 1.3.2",
"core-foundation",
@ -3030,9 +3019,9 @@ dependencies = [
[[package]]
name = "security-framework-sys"
version = "2.9.1"
version = "2.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e932934257d3b408ed8f30db49d85ea163bfe74961f017f405b025af298f0c7a"
checksum = "41f3cc463c0ef97e11c3461a9d3787412d30e8e7eb907c79180c4a57bf7c04ef"
dependencies = [
"core-foundation-sys",
"libc",
@ -3064,7 +3053,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -3258,9 +3247,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strsim"
version = "0.11.0"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "strum"
@ -3281,7 +3270,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -3303,9 +3292,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.55"
version = "2.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687"
dependencies = [
"proc-macro2",
"quote",
@ -3320,9 +3309,9 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
[[package]]
name = "sysinfo"
version = "0.30.7"
version = "0.30.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c385888ef380a852a16209afc8cfad22795dd8873d69c9a14d2e2088f118d18"
checksum = "26d7c217777061d5a2d652aea771fb9ba98b6dade657204b08c4b9604d11555b"
dependencies = [
"cfg-if",
"core-foundation-sys",
@ -3410,7 +3399,7 @@ dependencies = [
[[package]]
name = "text-generation-benchmark"
version = "1.4.5"
version = "2.0.0"
dependencies = [
"average",
"clap",
@ -3431,11 +3420,11 @@ dependencies = [
[[package]]
name = "text-generation-client"
version = "1.4.5"
version = "2.0.0"
dependencies = [
"futures",
"grpc-metadata",
"prost 0.12.3",
"prost 0.12.4",
"prost-build",
"thiserror",
"tokio",
@ -3447,7 +3436,7 @@ dependencies = [
[[package]]
name = "text-generation-launcher"
version = "1.4.5"
version = "2.0.0"
dependencies = [
"clap",
"ctrlc",
@ -3465,7 +3454,7 @@ dependencies = [
[[package]]
name = "text-generation-router"
version = "1.4.5"
version = "2.0.0"
dependencies = [
"async-stream",
"axum",
@ -3522,7 +3511,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -3548,9 +3537,9 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.34"
version = "0.3.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749"
checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
dependencies = [
"deranged",
"itoa",
@ -3571,9 +3560,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "time-macros"
version = "0.2.17"
version = "0.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774"
checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf"
dependencies = [
"num-conv",
"time-core",
@ -3699,7 +3688,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -3829,7 +3818,7 @@ dependencies = [
"hyper-timeout",
"percent-encoding",
"pin-project",
"prost 0.12.3",
"prost 0.12.4",
"tokio",
"tokio-stream",
"tower",
@ -3848,7 +3837,7 @@ dependencies = [
"proc-macro2",
"prost-build",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -3921,7 +3910,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -4162,7 +4151,7 @@ dependencies = [
"proc-macro2",
"quote",
"regex",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]
@ -4284,7 +4273,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
"wasm-bindgen-shared",
]
@ -4318,7 +4307,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -4364,18 +4353,6 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082"
[[package]]
name = "which"
version = "4.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
dependencies = [
"either",
"home",
"once_cell",
"rustix",
]
[[package]]
name = "winapi"
version = "0.3.9"
@ -4626,9 +4603,9 @@ checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
[[package]]
name = "winnow"
version = "0.6.5"
version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dffa400e67ed5a4dd237983829e66475f0a4a26938c4b04c21baede6262215b8"
checksum = "f0c976aaaa0e1f90dbb21e9587cdaf1d9679a1cde8875c0d6bd83ab96a208352"
dependencies = [
"memchr",
]
@ -4660,7 +4637,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.55",
"syn 2.0.58",
]
[[package]]

View File

@ -9,7 +9,7 @@ members = [
resolver = "2"
[workspace.package]
version = "1.4.5"
version = "2.0.0"
edition = "2021"
authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"

View File

@ -76,7 +76,7 @@ For a detailed starting guide, please see the [Quick Tour](https://huggingface.c
model=HuggingFaceH4/zephyr-7b-beta
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model
docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.0 --model-id $model
```
And then you can make requests like
@ -90,7 +90,7 @@ curl 127.0.0.1:8080/generate_stream \
**Note:** To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 12.2 or higher. For running the Docker container on a machine with no GPUs or CUDA support, it is enough to remove the `--gpus all` flag and add `--disable-custom-kernels`, please note CPU is not the intended platform for this project, so performance might be subpar.
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4-rocm --model-id $model` instead of the command above.
**Note:** TGI supports AMD Instinct MI210 and MI250 GPUs. Details can be found in the [Supported Hardware documentation](https://huggingface.co/docs/text-generation-inference/supported_models#supported-hardware). To use AMD GPUs, please use `docker run --device /dev/kfd --device /dev/dri --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.0-rocm --model-id $model` instead of the command above.
To see all options to serve your models (in the [code](https://github.com/huggingface/text-generation-inference/blob/main/launcher/src/main.rs) or in the cli):
```
@ -120,7 +120,7 @@ model=meta-llama/Llama-2-7b-chat-hf
volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
token=<your cli READ token>
docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model
docker run --gpus all --shm-size 1g -e HUGGING_FACE_HUB_TOKEN=$token -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:2.0 --model-id $model
```
### A note on Shared Memory (shm)

View File

@ -10,7 +10,7 @@
"name": "Apache 2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
},
"version": "1.4.5"
"version": "2.0.0"
},
"paths": {
"/": {

View File

@ -17,7 +17,7 @@
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "1.4.5-native",
"system_fingerprint": "2.0.0-native",
"usage": {
"completion_tokens": 100,
"prompt_tokens": 60,

View File

@ -31,7 +31,7 @@
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "1.4.5-native",
"system_fingerprint": "2.0.0-native",
"usage": {
"completion_tokens": 29,
"prompt_tokens": 316,

View File

@ -31,7 +31,7 @@
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "1.4.5-native",
"system_fingerprint": "2.0.0-native",
"usage": {
"completion_tokens": 29,
"prompt_tokens": 316,

View File

@ -30,7 +30,7 @@
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "1.4.5-native",
"system_fingerprint": "2.0.0-native",
"usage": {
"completion_tokens": 21,
"prompt_tokens": 187,

View File

@ -23,5 +23,5 @@
"id": "",
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
"object": "text_completion",
"system_fingerprint": "1.4.5-native"
"system_fingerprint": "2.0.0-native"
}

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-integration-tests"
version = "1.4.5"
version = "2.0.0"
description = "Text Generation Inference integration tests"
authors = ["Nicolas Patry <nicolas@huggingface.co>"]

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-server"
version = "1.4.5"
version = "2.0.0"
description = "Text Generation Inference Python gRPC Server"
authors = ["Olivier Dehaene <olivier@huggingface.co>"]

View File

@ -23,6 +23,10 @@ class ExceptionInterceptor(AsyncServerInterceptor):
method_name = method_name.split("/")[-1]
logger.exception(f"Method {method_name} encountered an error.")
# Runtime Error cannot be recovered from
if isinstance(err, RuntimeError):
exit(1)
if torch.cuda.is_available():
torch.cuda.empty_cache()

View File

@ -55,9 +55,10 @@ class CacheManager:
):
# Get free blocks indices by finding values in mask that are not set to 0
free_block_indices = self.free_block_mask.nonzero()
assert (
len(free_block_indices) >= blocks
), f"Out of available cache blocks: asked {blocks}, only {len(free_block_indices)} free blocks"
if blocks > len(free_block_indices):
raise RuntimeError(
f"Out of available cache blocks: asked {blocks}, only {len(free_block_indices)} free blocks"
)
# Slice by the number of required blocks
block_indices = free_block_indices[:blocks]

View File

@ -503,6 +503,10 @@ class MedusaHeadV1(nn.Module):
self, input: torch.Tensor
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
logits = self.lm_head(input)
# If we have too many tokens, we skip speculative logits
if input.shape[0] > 128:
return logits, None
speculative_logits = self.medusa(input)
return logits, speculative_logits
@ -549,6 +553,11 @@ class MedusaHeadV2(nn.Module):
self.lm_head = TensorParallelHead.load(config, prefix, weights)
def forward(self, x):
# If we have too many tokens, we skip speculative logits
if x.shape[0] > 128:
logits = self.lm_head(x)
return logits, None
size = x.shape[-1]
block_size = (size + self.world_size - 1) // self.world_size
start = self.rank * block_size