v0.8.2
This commit is contained in:
parent
95d3546976
commit
e7248fe90e
|
@ -2557,7 +2557,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-benchmark"
|
name = "text-generation-benchmark"
|
||||||
version = "0.8.1"
|
version = "0.8.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"average",
|
"average",
|
||||||
"clap",
|
"clap",
|
||||||
|
@ -2577,7 +2577,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-client"
|
name = "text-generation-client"
|
||||||
version = "0.8.1"
|
version = "0.8.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"grpc-metadata",
|
"grpc-metadata",
|
||||||
|
@ -2593,7 +2593,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-launcher"
|
name = "text-generation-launcher"
|
||||||
version = "0.8.1"
|
version = "0.8.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap",
|
"clap",
|
||||||
"ctrlc",
|
"ctrlc",
|
||||||
|
@ -2609,7 +2609,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "text-generation-router"
|
name = "text-generation-router"
|
||||||
version = "0.8.1"
|
version = "0.8.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-stream",
|
"async-stream",
|
||||||
"axum",
|
"axum",
|
||||||
|
|
|
@ -8,7 +8,7 @@ members = [
|
||||||
]
|
]
|
||||||
|
|
||||||
[workspace.package]
|
[workspace.package]
|
||||||
version = "0.8.1"
|
version = "0.8.2"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
authors = ["Olivier Dehaene"]
|
authors = ["Olivier Dehaene"]
|
||||||
homepage = "https://github.com/huggingface/text-generation-inference"
|
homepage = "https://github.com/huggingface/text-generation-inference"
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
"name": "Apache 2.0",
|
"name": "Apache 2.0",
|
||||||
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
"url": "https://www.apache.org/licenses/LICENSE-2.0"
|
||||||
},
|
},
|
||||||
"version": "0.8.1"
|
"version": "0.8.2"
|
||||||
},
|
},
|
||||||
"paths": {
|
"paths": {
|
||||||
"/": {
|
"/": {
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "text-generation-server"
|
name = "text-generation-server"
|
||||||
version = "0.8.1"
|
version = "0.8.2"
|
||||||
description = "Text Generation Inference Python gRPC Server"
|
description = "Text Generation Inference Python gRPC Server"
|
||||||
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
||||||
|
|
||||||
|
|
|
@ -138,7 +138,9 @@ def get_model(
|
||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
config_dict, _ = PretrainedConfig.get_config_dict(model_id, revision=revision, trust_remote_code=trust_remote_code)
|
config_dict, _ = PretrainedConfig.get_config_dict(
|
||||||
|
model_id, revision=revision, trust_remote_code=trust_remote_code
|
||||||
|
)
|
||||||
model_type = config_dict["model_type"]
|
model_type = config_dict["model_type"]
|
||||||
|
|
||||||
if model_type == "gpt_bigcode":
|
if model_type == "gpt_bigcode":
|
||||||
|
|
|
@ -87,7 +87,9 @@ class FlashSantacoder(FlashCausalLM):
|
||||||
):
|
):
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
with safe_open(
|
with safe_open(
|
||||||
filename, framework="pt", device=str(device) if quantize is None else "cpu"
|
filename,
|
||||||
|
framework="pt",
|
||||||
|
device=str(device) if quantize is None else "cpu",
|
||||||
) as f:
|
) as f:
|
||||||
for key in f.keys():
|
for key in f.keys():
|
||||||
value = f.get_tensor(key)
|
value = f.get_tensor(key)
|
||||||
|
@ -148,11 +150,13 @@ class FlashSantacoder(FlashCausalLM):
|
||||||
module._parameters[param_name][: value.shape[0]] = value
|
module._parameters[param_name][: value.shape[0]] = value
|
||||||
elif "kv_attn.weight" in key:
|
elif "kv_attn.weight" in key:
|
||||||
module._parameters[param_name][
|
module._parameters[param_name][
|
||||||
model.transformer.head_size * model.transformer.num_heads :
|
model.transformer.head_size
|
||||||
|
* model.transformer.num_heads :
|
||||||
] = value
|
] = value
|
||||||
elif "kv_attn.bias" in key:
|
elif "kv_attn.bias" in key:
|
||||||
module._parameters[param_name][
|
module._parameters[param_name][
|
||||||
model.transformer.head_size * model.transformer.num_heads :
|
model.transformer.head_size
|
||||||
|
* model.transformer.num_heads :
|
||||||
] = value
|
] = value
|
||||||
else:
|
else:
|
||||||
if current_parameter_tensor.shape != value.shape:
|
if current_parameter_tensor.shape != value.shape:
|
||||||
|
|
Loading…
Reference in New Issue