2024-08-20 14:07:33 -06:00
|
|
|
{
|
|
|
|
nix-filter,
|
|
|
|
buildPythonPackage,
|
|
|
|
poetry-core,
|
|
|
|
mypy-protobuf,
|
2024-08-21 14:20:03 -06:00
|
|
|
awq-inference-engine,
|
2024-08-20 14:07:33 -06:00
|
|
|
causal-conv1d,
|
2024-11-10 05:54:07 -07:00
|
|
|
compressed-tensors,
|
2024-08-21 01:06:33 -06:00
|
|
|
eetq,
|
2024-08-20 14:07:33 -06:00
|
|
|
einops,
|
|
|
|
exllamav2,
|
|
|
|
flashinfer,
|
|
|
|
flash-attn,
|
|
|
|
flash-attn-layer-norm,
|
|
|
|
flash-attn-rotary,
|
2024-09-27 08:19:42 -06:00
|
|
|
flash-attn-v1,
|
2024-08-20 14:07:33 -06:00
|
|
|
grpc-interceptor,
|
|
|
|
grpcio-reflection,
|
|
|
|
grpcio-status,
|
|
|
|
grpcio-tools,
|
|
|
|
hf-transfer,
|
|
|
|
loguru,
|
|
|
|
mamba-ssm,
|
|
|
|
marlin-kernels,
|
2024-09-17 10:08:58 -06:00
|
|
|
moe-kernels,
|
2024-08-20 14:07:33 -06:00
|
|
|
opentelemetry-api,
|
|
|
|
opentelemetry-exporter-otlp,
|
|
|
|
opentelemetry-instrumentation-grpc,
|
|
|
|
opentelemetry-semantic-conventions,
|
|
|
|
peft,
|
2024-09-02 03:31:36 -06:00
|
|
|
punica-kernels,
|
2024-08-20 14:07:33 -06:00
|
|
|
safetensors,
|
|
|
|
tokenizers,
|
2024-08-29 08:25:25 -06:00
|
|
|
torch,
|
2024-08-20 14:07:33 -06:00
|
|
|
sentencepiece,
|
|
|
|
transformers,
|
|
|
|
typer,
|
|
|
|
vllm,
|
|
|
|
}:
|
|
|
|
|
|
|
|
let
|
|
|
|
filter = nix-filter.lib;
|
|
|
|
in
|
|
|
|
buildPythonPackage {
|
|
|
|
name = "text-generation-server";
|
|
|
|
|
|
|
|
src = filter {
|
|
|
|
root = ../.;
|
|
|
|
include = with filter; [
|
|
|
|
isDirectory
|
|
|
|
(and (inDirectory "server") (or_ (matchExt "py") (matchExt "pyi")))
|
|
|
|
"server/pyproject.toml"
|
|
|
|
(and (inDirectory "proto/v3") (matchExt "proto"))
|
|
|
|
];
|
|
|
|
};
|
|
|
|
|
|
|
|
pyproject = true;
|
|
|
|
|
|
|
|
build-system = [ poetry-core ];
|
|
|
|
|
|
|
|
nativeBuildInputs = [ mypy-protobuf ];
|
|
|
|
|
|
|
|
pythonRelaxDeps = [
|
|
|
|
"einops"
|
|
|
|
"huggingface-hub"
|
|
|
|
"loguru"
|
|
|
|
"opentelemetry-instrumentation-grpc"
|
|
|
|
"sentencepiece"
|
|
|
|
"typer"
|
|
|
|
];
|
|
|
|
|
|
|
|
pythonRemoveDeps = [ "scipy" ];
|
|
|
|
|
|
|
|
dependencies = [
|
2024-08-21 14:20:03 -06:00
|
|
|
awq-inference-engine
|
2024-08-21 01:06:33 -06:00
|
|
|
eetq
|
2024-08-20 14:07:33 -06:00
|
|
|
causal-conv1d
|
2024-11-10 05:54:07 -07:00
|
|
|
compressed-tensors
|
2024-08-20 14:07:33 -06:00
|
|
|
einops
|
|
|
|
exllamav2
|
|
|
|
flashinfer
|
|
|
|
flash-attn
|
|
|
|
flash-attn-layer-norm
|
|
|
|
flash-attn-rotary
|
|
|
|
grpc-interceptor
|
|
|
|
grpcio-reflection
|
|
|
|
grpcio-status
|
|
|
|
grpcio-tools
|
|
|
|
hf-transfer
|
|
|
|
loguru
|
|
|
|
mamba-ssm
|
|
|
|
marlin-kernels
|
2024-09-17 10:08:58 -06:00
|
|
|
moe-kernels
|
2024-08-20 14:07:33 -06:00
|
|
|
opentelemetry-api
|
|
|
|
opentelemetry-exporter-otlp
|
|
|
|
opentelemetry-instrumentation-grpc
|
|
|
|
opentelemetry-semantic-conventions
|
|
|
|
peft
|
2024-09-02 03:31:36 -06:00
|
|
|
punica-kernels
|
2024-08-20 14:07:33 -06:00
|
|
|
safetensors
|
|
|
|
sentencepiece
|
|
|
|
tokenizers
|
|
|
|
transformers
|
|
|
|
typer
|
|
|
|
vllm
|
|
|
|
];
|
|
|
|
|
|
|
|
prePatch = ''
|
|
|
|
python -m grpc_tools.protoc -Iproto/v3 --python_out=server/text_generation_server/pb \
|
|
|
|
--grpc_python_out=server/text_generation_server/pb --mypy_out=server/text_generation_server/pb proto/v3/generate.proto
|
|
|
|
find server/text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
|
|
|
|
touch server/text_generation_server/pb/__init__.py
|
|
|
|
cd server
|
|
|
|
'';
|
|
|
|
}
|