This commit is contained in:
OlivierDehaene 2023-12-22 15:46:04 +01:00
parent 529d7c2591
commit 630800eed3
8 changed files with 86 additions and 98 deletions

161
Cargo.lock generated
View File

@ -88,9 +88,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.75"
version = "1.0.76"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
checksum = "59d2a3357dde987206219e78ecfbbb6e8dad06cbb65292758d3270e6254f7355"
[[package]]
name = "arc-swap"
@ -128,18 +128,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
name = "async-trait"
version = "0.1.74"
version = "0.1.75"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a66537f1bb974b254c98ed142ff995236e81b9d0fe4db0575f46612cb15eb0f9"
checksum = "fdf6721fb0140e4f897002dd086c06f6c27775df19cfe1fccb21181a48fd2c98"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -362,7 +362,7 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -426,9 +426,9 @@ dependencies = [
[[package]]
name = "crossbeam-channel"
version = "0.5.8"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
checksum = "14c3242926edf34aec4ac3a77108ad4854bffaa2e4ddc1824124ce59231302d5"
dependencies = [
"cfg-if",
"crossbeam-utils",
@ -436,9 +436,9 @@ dependencies = [
[[package]]
name = "crossbeam-deque"
version = "0.8.3"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef"
checksum = "fca89a0e215bab21874660c67903c5f143333cab1da83d041c7ded6053774751"
dependencies = [
"cfg-if",
"crossbeam-epoch",
@ -447,22 +447,21 @@ dependencies = [
[[package]]
name = "crossbeam-epoch"
version = "0.9.15"
version = "0.9.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7"
checksum = "2d2fe95351b870527a5d09bf563ed3c97c0cffb87cf1c78a591bf48bb218d9aa"
dependencies = [
"autocfg",
"cfg-if",
"crossbeam-utils",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.16"
version = "0.8.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
checksum = "c06d96137f14f244c37f989d9fff8f95e6c18b918e71f36638f8c49112e4c78f"
dependencies = [
"cfg-if",
]
@ -813,7 +812,7 @@ checksum = "53b153fd91e4b0147f4aced87be237c98248656bb01050b96bf3ee89220a8ddb"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -963,11 +962,11 @@ dependencies = [
[[package]]
name = "home"
version = "0.5.5"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb"
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.52.0",
]
[[package]]
@ -1023,9 +1022,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "hyper"
version = "0.14.27"
version = "0.14.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468"
checksum = "bf96e135eb83a2a8ddf766e426a841d8ddd7449d5f00d34ea02b41d2f19eef80"
dependencies = [
"bytes",
"futures-channel",
@ -1038,7 +1037,7 @@ dependencies = [
"httpdate",
"itoa",
"pin-project-lite",
"socket2 0.4.10",
"socket2",
"tokio",
"tower-service",
"tracing",
@ -1240,9 +1239,9 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "mach2"
version = "0.4.1"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d0d1830bcd151a6fc4aea1369af235b36c1528fe976b8ff678683c9995eade8"
checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709"
dependencies = [
"libc",
]
@ -1312,9 +1311,9 @@ dependencies = [
[[package]]
name = "metrics-exporter-prometheus"
version = "0.12.1"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a4964177ddfdab1e3a2b37aec7cf320e14169abb0ed73999f558136409178d5"
checksum = "1d4fa7ce7c4862db464a37b0b31d89bca874562f034bd7993895572783d02950"
dependencies = [
"base64 0.21.5",
"hyper",
@ -1336,7 +1335,7 @@ checksum = "ddece26afd34c31585c74a4db0630c376df271c285d682d1e55012197830b6df"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -1415,7 +1414,7 @@ checksum = "531c82a934da419bed3da09bd87d6e98c72f8d4aa755427b3b009c2b8b8c433c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -1612,9 +1611,9 @@ dependencies = [
[[package]]
name = "openssl"
version = "0.10.61"
version = "0.10.62"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b8419dc8cc6d866deb801274bba2e6f8f6108c1bb7fcc10ee5ab864931dbb45"
checksum = "8cde4d2d9200ad5909f8dac647e29482e07c3a35de8a13fce7c9c7747ad9f671"
dependencies = [
"bitflags 2.4.1",
"cfg-if",
@ -1633,7 +1632,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -1644,9 +1643,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-sys"
version = "0.9.97"
version = "0.9.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c3eaad34cdd97d81de97964fc7f29e2d104f483840d906ef56daa1912338460b"
checksum = "c1665caf8ab2dc9aef43d1c0023bd904633a6a05cb30b0ad59bec2ae986e57a7"
dependencies = [
"cc",
"libc",
@ -1849,7 +1848,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -1866,9 +1865,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
[[package]]
name = "pkg-config"
version = "0.3.27"
version = "0.3.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a"
[[package]]
name = "portable-atomic"
@ -1895,7 +1894,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
dependencies = [
"proc-macro2",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -1924,9 +1923,9 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.70"
version = "1.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8"
dependencies = [
"unicode-ident",
]
@ -1968,7 +1967,7 @@ dependencies = [
"prost 0.12.3",
"prost-types",
"regex",
"syn 2.0.40",
"syn 2.0.42",
"tempfile",
"which",
]
@ -1996,7 +1995,7 @@ dependencies = [
"itertools 0.11.0",
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -2192,9 +2191,9 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
[[package]]
name = "reqwest"
version = "0.11.22"
version = "0.11.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "046cd98826c46c2ac8ddecae268eb5c2e58628688a5fc7a2643704a73faba95b"
checksum = "37b1ae8d9ac08420c66222fb9096fc5de435c3c48542bc5336c51892cffafb41"
dependencies = [
"base64 0.21.5",
"bytes",
@ -2278,7 +2277,7 @@ dependencies = [
"quote",
"rust-embed-utils",
"shellexpand",
"syn 2.0.40",
"syn 2.0.42",
"walkdir",
]
@ -2455,7 +2454,7 @@ checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -2580,16 +2579,6 @@ version = "1.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970"
[[package]]
name = "socket2"
version = "0.4.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f7916fc008ca5542385b89a3d3ce689953c143e9304a9bf8beec1de48994c0d"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "socket2"
version = "0.5.5"
@ -2649,7 +2638,7 @@ dependencies = [
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -2665,9 +2654,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.40"
version = "2.0.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13fa70a4ee923979ffb522cacce59d34421ebdea5625e1073c4326ef9d2dd42e"
checksum = "5b7d0a2c048d661a1a59fcd7355baa232f7ed34e0ee4df2eef3c1c1c0d3852d8"
dependencies = [
"proc-macro2",
"quote",
@ -2754,7 +2743,7 @@ dependencies = [
[[package]]
name = "text-generation-benchmark"
version = "1.3.3"
version = "1.3.4"
dependencies = [
"average",
"clap",
@ -2775,7 +2764,7 @@ dependencies = [
[[package]]
name = "text-generation-client"
version = "1.3.3"
version = "1.3.4"
dependencies = [
"futures",
"grpc-metadata",
@ -2791,7 +2780,7 @@ dependencies = [
[[package]]
name = "text-generation-launcher"
version = "1.3.3"
version = "1.3.4"
dependencies = [
"clap",
"ctrlc",
@ -2807,7 +2796,7 @@ dependencies = [
[[package]]
name = "text-generation-router"
version = "1.3.3"
version = "1.3.4"
dependencies = [
"async-stream",
"axum",
@ -2842,22 +2831,22 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.50"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2"
checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.50"
version = "1.0.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8"
checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -2872,9 +2861,9 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.30"
version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5"
checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e"
dependencies = [
"deranged",
"itoa",
@ -2894,9 +2883,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "time-macros"
version = "0.2.15"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20"
checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f"
dependencies = [
"time-core",
]
@ -2952,9 +2941,9 @@ dependencies = [
[[package]]
name = "tokio"
version = "1.35.0"
version = "1.35.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "841d45b238a16291a4e1584e61820b8ae57d696cc5015c459c229ccc6990cc1c"
checksum = "c89b4efa943be685f629b149f53829423f8f5531ea21249408e8e2f8671ec104"
dependencies = [
"backtrace",
"bytes",
@ -2964,7 +2953,7 @@ dependencies = [
"parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2 0.5.5",
"socket2",
"tokio-macros",
"windows-sys 0.48.0",
]
@ -2987,7 +2976,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -3102,7 +3091,7 @@ dependencies = [
"proc-macro2",
"prost-build",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -3175,7 +3164,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -3415,7 +3404,7 @@ dependencies = [
"proc-macro2",
"quote",
"regex",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]
@ -3511,7 +3500,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
"wasm-bindgen-shared",
]
@ -3545,7 +3534,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -3835,22 +3824,22 @@ dependencies = [
[[package]]
name = "zerocopy"
version = "0.7.30"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "306dca4455518f1f31635ec308b6b3e4eb1b11758cefafc782827d0aa7acb5c7"
checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
version = "0.7.30"
version = "0.7.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be912bf68235a88fbefd1b73415cb218405958d1655b2ece9035a19920bdf6ba"
checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.40",
"syn 2.0.42",
]
[[package]]

View File

@ -8,7 +8,7 @@ members = [
]
[workspace.package]
version = "1.3.3"
version = "1.3.4"
edition = "2021"
authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"

View File

@ -10,7 +10,7 @@
"name": "Apache 2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
},
"version": "1.3.3"
"version": "1.3.4"
},
"paths": {
"/": {

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-integration-tests"
version = "1.3.3"
version = "1.3.4"
description = "Text Generation Inference integration tests"
authors = ["Nicolas Patry <nicolas@huggingface.co>"]

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "text-generation-server"
version = "1.3.3"
version = "1.3.4"
description = "Text Generation Inference Python gRPC Server"
authors = ["Olivier Dehaene <olivier@huggingface.co>"]

View File

@ -39,7 +39,7 @@ if V2 and int(os.getenv("WORLD_SIZE", "1")) > 1:
V2 = False
log_once(
logger.warning,
"Disabling exllama v2 and using v1 instead because there are issues when sharding"
"Disabling exllama v2 and using v1 instead because there are issues when sharding",
)
if os.getenv("DISABLE_EXLLAMA") == "True":

View File

@ -215,7 +215,9 @@ class Weights:
bits, groupsize, desc_act = self._get_gptq_params()
from text_generation_server.utils.layers import HAS_EXLLAMA
use_exllama = bits == 4 and HAS_EXLLAMA and quantize == "gptq" and not desc_act
use_exllama = (
bits == 4 and HAS_EXLLAMA and quantize == "gptq" and not desc_act
)
weight = (qweight, qzeros, scales, g_idx, bits, groupsize, use_exllama)
else:
w = [self.get_sharded(f"{p}.weight", dim=0) for p in prefixes]
@ -281,14 +283,11 @@ class Weights:
if CAN_EXLLAMA:
log_once(
logger.warning,
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True"
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True",
)
use_exllama = False
else:
log_once(
logger.info,
f"Using exllama kernels v{HAS_EXLLAMA}"
)
log_once(logger.info, f"Using exllama kernels v{HAS_EXLLAMA}")
g_idx = self.get_sharded(f"{prefix}.g_idx", dim=0)