97 lines
4.3 KiB
TOML
97 lines
4.3 KiB
TOML
[tool.poetry]
|
|
name = "text-generation-server"
|
|
version = "2.0.5-dev0"
|
|
description = "Text Generation Inference Python gRPC Server"
|
|
authors = ["Olivier Dehaene <olivier@huggingface.co>"]
|
|
|
|
[tool.poetry.scripts]
|
|
text-generation-server = 'text_generation_server.cli:app'
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.9,<3.13"
|
|
protobuf = ">=4.25.3,<6"
|
|
grpcio = "^1.51.1"
|
|
grpcio-status = "^1.51.1"
|
|
grpcio-reflection = "^1.51.1"
|
|
grpc-interceptor = "^0.15.0"
|
|
typer = "^0.6.1"
|
|
accelerate = { version = "^0.29.1", optional = true }
|
|
bitsandbytes = { version = "^0.43.0", optional = true }
|
|
safetensors = "^0.4"
|
|
loguru = "^0.6.0"
|
|
opentelemetry-api = "^1.25.0"
|
|
opentelemetry-exporter-otlp = "^1.25.0"
|
|
opentelemetry-instrumentation-grpc = "^0.46b0"
|
|
hf-transfer = "^0.1.2"
|
|
sentencepiece = "^0.2"
|
|
tokenizers = "^0.20"
|
|
huggingface-hub = "^0.23"
|
|
transformers = "^4.45"
|
|
einops = "^0.6.1"
|
|
texttable = { version = "^1.6.7", optional = true }
|
|
datasets = { version = "^2.14.0", optional = true }
|
|
peft = { version = "^0.10", optional = true }
|
|
torch = { version = "^2.4.0", optional = true }
|
|
scipy = "^1.11.1"
|
|
pillow = "^10.0.0"
|
|
outlines= { version = "^0.1.1", optional = true }
|
|
prometheus-client = ">=0.20.0,<0.22"
|
|
py-cpuinfo = "^9.0.0"
|
|
compressed-tensors = { version = "^0.7.1", optional = true }
|
|
# Remove later, temporary workaround for outlines.
|
|
numpy = "^1.26"
|
|
|
|
attention-kernels = [
|
|
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
|
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
|
|
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
|
|
{ url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
|
]
|
|
marlin-kernels = [
|
|
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
|
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
|
|
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
|
|
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
|
]
|
|
moe-kernels = [
|
|
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
|
|
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
|
|
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
|
|
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
|
|
]
|
|
rich = "^13.7.1"
|
|
|
|
[tool.poetry.extras]
|
|
torch = ["torch"]
|
|
accelerate = ["accelerate"]
|
|
attention = ["attention-kernels"]
|
|
bnb = ["bitsandbytes"]
|
|
compressed-tensors = ["compressed-tensors"]
|
|
marlin = ["marlin-kernels"]
|
|
moe = ["moe-kernels"]
|
|
peft = ["peft"]
|
|
quantize = ["texttable", "datasets", "accelerate"]
|
|
outlines = ["outlines"]
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
grpcio-tools = "^1.51.1"
|
|
pytest = "^7.3.0"
|
|
|
|
|
|
[[tool.poetry.source]]
|
|
name = "pytorch-gpu-src"
|
|
url = "https://download.pytorch.org/whl/cu121"
|
|
priority = "explicit"
|
|
|
|
[tool.pytest.ini_options]
|
|
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]
|
|
|
|
[build-system]
|
|
requires = [
|
|
"poetry-core>=1.0.0",
|
|
]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
[tool.isort]
|
|
profile = "black"
|