2024-08-09 04:32:37 -06:00
|
|
|
{
|
|
|
|
inputs = {
|
2024-08-13 02:44:15 -06:00
|
|
|
crate2nix = {
|
|
|
|
url = "github:nix-community/crate2nix";
|
|
|
|
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
|
|
|
|
};
|
2024-08-16 02:01:01 -06:00
|
|
|
nix-filter.url = "github:numtide/nix-filter";
|
2024-11-18 09:20:31 -07:00
|
|
|
tgi-nix.url = "github:huggingface/text-generation-inference-nix/marlin-kernels-0.3.5";
|
2024-08-09 04:32:37 -06:00
|
|
|
nixpkgs.follows = "tgi-nix/nixpkgs";
|
|
|
|
flake-utils.url = "github:numtide/flake-utils";
|
2024-08-09 07:24:21 -06:00
|
|
|
rust-overlay = {
|
|
|
|
url = "github:oxalica/rust-overlay";
|
|
|
|
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
|
|
|
|
};
|
2024-08-09 04:32:37 -06:00
|
|
|
};
|
|
|
|
outputs =
|
|
|
|
{
|
|
|
|
self,
|
2024-08-13 02:44:15 -06:00
|
|
|
crate2nix,
|
2024-08-16 02:01:01 -06:00
|
|
|
nix-filter,
|
2024-08-09 04:32:37 -06:00
|
|
|
nixpkgs,
|
|
|
|
flake-utils,
|
2024-08-09 07:24:21 -06:00
|
|
|
rust-overlay,
|
2024-08-09 04:32:37 -06:00
|
|
|
tgi-nix,
|
|
|
|
}:
|
|
|
|
flake-utils.lib.eachDefaultSystem (
|
|
|
|
system:
|
|
|
|
let
|
2024-08-13 02:44:15 -06:00
|
|
|
cargoNix = crate2nix.tools.${system}.appliedCargoNix {
|
|
|
|
name = "tgi";
|
|
|
|
src = ./.;
|
2024-08-15 02:21:51 -06:00
|
|
|
additionalCargoNixArgs = [ "--all-features" ];
|
2024-08-13 02:44:15 -06:00
|
|
|
};
|
2024-08-09 04:32:37 -06:00
|
|
|
pkgs = import nixpkgs {
|
2024-08-29 08:25:25 -06:00
|
|
|
inherit system;
|
|
|
|
inherit (tgi-nix.lib) config;
|
2024-08-09 07:24:21 -06:00
|
|
|
overlays = [
|
|
|
|
rust-overlay.overlays.default
|
2024-08-29 08:25:25 -06:00
|
|
|
tgi-nix.overlays.default
|
2024-10-04 08:52:42 -06:00
|
|
|
(import nix/overlay.nix)
|
2024-08-09 07:24:21 -06:00
|
|
|
];
|
2024-08-09 04:32:37 -06:00
|
|
|
};
|
2024-08-16 02:01:01 -06:00
|
|
|
crateOverrides = import ./nix/crate-overrides.nix { inherit pkgs nix-filter; };
|
2024-08-20 23:48:13 -06:00
|
|
|
benchmark = cargoNix.workspaceMembers.text-generation-benchmark.build.override {
|
|
|
|
inherit crateOverrides;
|
|
|
|
};
|
2024-08-20 14:07:33 -06:00
|
|
|
launcher = cargoNix.workspaceMembers.text-generation-launcher.build.override {
|
|
|
|
inherit crateOverrides;
|
|
|
|
};
|
2024-09-12 02:44:01 -06:00
|
|
|
router =
|
|
|
|
let
|
|
|
|
routerUnwrapped = cargoNix.workspaceMembers.text-generation-router-v3.build.override {
|
|
|
|
inherit crateOverrides;
|
|
|
|
};
|
|
|
|
packagePath =
|
|
|
|
with pkgs.python3.pkgs;
|
|
|
|
makePythonPath [
|
|
|
|
protobuf
|
|
|
|
sentencepiece
|
|
|
|
torch
|
|
|
|
transformers
|
|
|
|
];
|
|
|
|
in
|
|
|
|
pkgs.writeShellApplication {
|
|
|
|
name = "text-generation-router";
|
|
|
|
text = ''
|
|
|
|
PYTHONPATH="${packagePath}" ${routerUnwrapped}/bin/text-generation-router "$@"
|
|
|
|
'';
|
|
|
|
};
|
2024-08-20 14:07:33 -06:00
|
|
|
server = pkgs.python3.pkgs.callPackage ./nix/server.nix { inherit nix-filter; };
|
2024-09-19 12:50:37 -06:00
|
|
|
client = pkgs.python3.pkgs.callPackage ./nix/client.nix { };
|
2024-08-09 04:32:37 -06:00
|
|
|
in
|
|
|
|
{
|
2024-09-17 04:14:30 -06:00
|
|
|
checks = {
|
2024-09-19 12:50:37 -06:00
|
|
|
rust =
|
|
|
|
with pkgs;
|
|
|
|
rustPlatform.buildRustPackage {
|
|
|
|
name = "rust-checks";
|
|
|
|
src = ./.;
|
|
|
|
cargoLock = {
|
|
|
|
lockFile = ./Cargo.lock;
|
|
|
|
};
|
|
|
|
buildInputs = [ openssl.dev ];
|
|
|
|
nativeBuildInputs = [
|
|
|
|
clippy
|
|
|
|
pkg-config
|
|
|
|
protobuf
|
|
|
|
python3
|
|
|
|
rustfmt
|
|
|
|
];
|
|
|
|
buildPhase = ''
|
|
|
|
cargo check
|
|
|
|
'';
|
|
|
|
checkPhase = ''
|
|
|
|
cargo fmt -- --check
|
|
|
|
cargo test -j $NIX_BUILD_CORES
|
|
|
|
cargo clippy
|
|
|
|
'';
|
|
|
|
installPhase = "touch $out";
|
2024-09-17 04:14:30 -06:00
|
|
|
};
|
|
|
|
};
|
2024-09-12 06:54:56 -06:00
|
|
|
formatter = pkgs.nixfmt-rfc-style;
|
2024-08-20 14:07:33 -06:00
|
|
|
devShells = with pkgs; rec {
|
|
|
|
default = pure;
|
|
|
|
|
|
|
|
pure = mkShell {
|
|
|
|
buildInputs = [
|
2024-08-20 23:48:13 -06:00
|
|
|
benchmark
|
2024-08-20 14:07:33 -06:00
|
|
|
launcher
|
|
|
|
router
|
|
|
|
server
|
|
|
|
];
|
|
|
|
};
|
2024-09-12 06:54:56 -06:00
|
|
|
test = mkShell {
|
|
|
|
buildInputs =
|
|
|
|
[
|
2024-09-19 12:50:37 -06:00
|
|
|
benchmark
|
|
|
|
launcher
|
|
|
|
router
|
2024-09-12 06:54:56 -06:00
|
|
|
server
|
2024-09-19 12:50:37 -06:00
|
|
|
client
|
2024-09-12 06:54:56 -06:00
|
|
|
openssl.dev
|
|
|
|
pkg-config
|
|
|
|
cargo
|
|
|
|
rustfmt
|
|
|
|
clippy
|
|
|
|
]
|
|
|
|
++ (with python3.pkgs; [
|
|
|
|
docker
|
|
|
|
pytest
|
|
|
|
pytest-asyncio
|
|
|
|
syrupy
|
|
|
|
pre-commit
|
|
|
|
ruff
|
|
|
|
]);
|
|
|
|
};
|
2024-08-20 14:07:33 -06:00
|
|
|
|
2024-09-27 08:19:42 -06:00
|
|
|
impure = callPackage ./nix/impure-shell.nix { inherit server; };
|
2024-08-09 04:32:37 -06:00
|
|
|
|
2024-10-22 03:02:55 -06:00
|
|
|
impureWithCuda = callPackage ./nix/impure-shell.nix {
|
|
|
|
inherit server;
|
|
|
|
withCuda = true;
|
|
|
|
};
|
|
|
|
|
2024-09-27 08:19:42 -06:00
|
|
|
impure-flash-attn-v1 = callPackage ./nix/impure-shell.nix {
|
|
|
|
server = server.override { flash-attn = python3.pkgs.flash-attn-v1; };
|
2024-08-09 04:32:37 -06:00
|
|
|
};
|
2024-08-20 14:07:33 -06:00
|
|
|
};
|
2024-08-23 14:06:22 -06:00
|
|
|
|
2024-10-01 10:02:06 -06:00
|
|
|
packages = rec {
|
|
|
|
default = pkgs.writeShellApplication {
|
|
|
|
name = "text-generation-inference";
|
|
|
|
runtimeInputs = [
|
|
|
|
server
|
|
|
|
router
|
|
|
|
];
|
|
|
|
text = ''
|
|
|
|
${launcher}/bin/text-generation-launcher "$@"
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
|
|
|
dockerImage = pkgs.callPackage nix/docker.nix {
|
|
|
|
text-generation-inference = default;
|
|
|
|
};
|
|
|
|
|
|
|
|
dockerImageStreamed = pkgs.callPackage nix/docker.nix {
|
|
|
|
text-generation-inference = default;
|
|
|
|
stream = true;
|
|
|
|
};
|
2024-08-23 14:06:22 -06:00
|
|
|
};
|
2024-08-09 04:32:37 -06:00
|
|
|
}
|
|
|
|
);
|
|
|
|
}
|