diff --git a/server/poetry.lock b/server/poetry.lock index ad7dab18..d03d03ae 100644 --- a/server/poetry.lock +++ b/server/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "accelerate" @@ -1924,13 +1924,13 @@ opentelemetry-api = "1.25.0" [[package]] name = "outlines" -version = "0.1.1" +version = "0.1.3" description = "Probabilistic Generative Model Programming" optional = true python-versions = ">=3.9" files = [ - {file = "outlines-0.1.1-py3-none-any.whl", hash = "sha256:896aee7f8f0472955104bb30fb118e525bced6885f09e833bb848782394f2c17"}, - {file = "outlines-0.1.1.tar.gz", hash = "sha256:9c5d3524ef21343bd681757e8ed9a5b1fcb335ee68f9b6b0889062ce23b561fc"}, + {file = "outlines-0.1.3-py3-none-any.whl", hash = "sha256:afcf6012b7cabbaae4a58975d03190c0bbc3d402b0b2a37538e05f335d73a247"}, + {file = "outlines-0.1.3.tar.gz", hash = "sha256:5a48ad00d3bdd8eccaa7574821eb5aaa27ab9f61fde9c3fba52f352dc00197e4"}, ] [package.dependencies] @@ -3986,6 +3986,7 @@ type = ["pytest-mypy"] [extras] accelerate = ["accelerate"] bnb = ["bitsandbytes"] +compressed-tensors = ["compressed-tensors"] marlin = ["marlin-kernels", "marlin-kernels", "marlin-kernels", "marlin-kernels"] moe = ["moe-kernels", "moe-kernels", "moe-kernels", "moe-kernels"] outlines = ["outlines"] @@ -3996,4 +3997,4 @@ torch = ["torch"] [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.13" -content-hash = "4636689efd4c94559c3c23903aafcffd177533a3b9006b3b4f8491b158a3a754" +content-hash = "5d1295a8becce2f65dc68d64f200acb5832de50fc0c37392f6f87bbc5b15d32a" diff --git a/server/requirements_cuda.txt b/server/requirements_cuda.txt index e3f6d20f..ad4ea56b 100644 --- a/server/requirements_cuda.txt +++ b/server/requirements_cuda.txt @@ -45,7 +45,7 @@ sentencepiece==0.2.0 ; python_version >= "3.9" and python_version < "3.13" setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13" tokenizers==0.20.1 ; python_version >= "3.9" and python_version < "3.13" tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13" -transformers==4.45.2 ; python_version >= "3.9" and python_version < "3.13" +transformers==4.46.0 ; python_version >= "3.9" and python_version < "3.13" typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13" typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13" urllib3==2.2.3 ; python_version >= "3.9" and python_version < "3.13" diff --git a/server/requirements_intel.txt b/server/requirements_intel.txt index e3f6d20f..ad4ea56b 100644 --- a/server/requirements_intel.txt +++ b/server/requirements_intel.txt @@ -45,7 +45,7 @@ sentencepiece==0.2.0 ; python_version >= "3.9" and python_version < "3.13" setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13" tokenizers==0.20.1 ; python_version >= "3.9" and python_version < "3.13" tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13" -transformers==4.45.2 ; python_version >= "3.9" and python_version < "3.13" +transformers==4.46.0 ; python_version >= "3.9" and python_version < "3.13" typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13" typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13" urllib3==2.2.3 ; python_version >= "3.9" and python_version < "3.13" diff --git a/server/requirements_rocm.txt b/server/requirements_rocm.txt index e3f6d20f..ad4ea56b 100644 --- a/server/requirements_rocm.txt +++ b/server/requirements_rocm.txt @@ -45,7 +45,7 @@ sentencepiece==0.2.0 ; python_version >= "3.9" and python_version < "3.13" setuptools==75.2.0 ; python_version >= "3.9" and python_version < "3.13" tokenizers==0.20.1 ; python_version >= "3.9" and python_version < "3.13" tqdm==4.66.5 ; python_version >= "3.9" and python_version < "3.13" -transformers==4.45.2 ; python_version >= "3.9" and python_version < "3.13" +transformers==4.46.0 ; python_version >= "3.9" and python_version < "3.13" typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13" typing-extensions==4.12.2 ; python_version >= "3.9" and python_version < "3.13" urllib3==2.2.3 ; python_version >= "3.9" and python_version < "3.13" diff --git a/server/text_generation_server/utils/logits_process.py b/server/text_generation_server/utils/logits_process.py index ec2813a1..d53f070c 100644 --- a/server/text_generation_server/utils/logits_process.py +++ b/server/text_generation_server/utils/logits_process.py @@ -501,7 +501,7 @@ class GrammarLogitProcessor(LogitsProcessor): allowed_tokens = self.fsm.get_next_instruction(fsm_grammar_state).tokens mask = torch.full_like(logits, -math.inf) if allowed_tokens is not None: - mask[:, allowed_tokens] = 0 + mask[:, allowed_tokens] = 0 biased_scores = logits + mask return biased_scores