Update to moe-kernels 0.7.0 (#2720)

This version syncs with the vLLM kernels and brings some performance
improvements.
This commit is contained in:
Daniël de Kok 2024-11-19 14:55:29 +01:00 committed by GitHub
parent b4ec427ad0
commit 2007a9473a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 30 additions and 31 deletions

View File

@ -108,11 +108,11 @@
"pre-commit-hooks": "pre-commit-hooks_3"
},
"locked": {
"lastModified": 1723311214,
"narHash": "sha256-xdGZQBEa1AC2us/sY3igS/CucWY6jErXsAvCFRhB2LI=",
"lastModified": 1730277369,
"narHash": "sha256-yvQbeJbnnwCB68yv7uZXdGb+P7NMn5JMGBw0aBHymDI=",
"owner": "nix-community",
"repo": "crate2nix",
"rev": "236f6addfd452a48be805819e3216af79e988fd5",
"rev": "151122427d030874ebef3517cda766a6984e6ed6",
"type": "github"
},
"original": {
@ -581,11 +581,11 @@
},
"nix-filter": {
"locked": {
"lastModified": 1710156097,
"narHash": "sha256-1Wvk8UP7PXdf8bCCaEoMnOT1qe5/Duqgj+rL8sRQsSM=",
"lastModified": 1730207686,
"narHash": "sha256-SCHiL+1f7q9TAnxpasriP6fMarWE5H43t25F5/9e28I=",
"owner": "numtide",
"repo": "nix-filter",
"rev": "3342559a24e85fc164b295c3444e8a139924675b",
"rev": "776e68c1d014c3adde193a18db9d738458cd2ba4",
"type": "github"
},
"original": {
@ -853,11 +853,11 @@
]
},
"locked": {
"lastModified": 1729045942,
"narHash": "sha256-HjmK0x5Zm2TK2vFpC7XBM2e3EDNVnAIuEoU2FkeN8xw=",
"lastModified": 1730687492,
"narHash": "sha256-xQVadjquBA/tFxDt5A55LJ1D1AvkVWsnrKC2o+pr8F4=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "9de3cea452d2401d6f93c06ad985178a4e11d1fc",
"rev": "41814763a2c597755b0755dbe3e721367a5e420f",
"type": "github"
},
"original": {
@ -978,16 +978,15 @@
"nixpkgs": "nixpkgs_6"
},
"locked": {
"lastModified": 1731923801,
"narHash": "sha256-SVtXtTGgnKjwPwMLe030l/DVhcm1vH4fXM7tUAPYOZc=",
"lastModified": 1732005645,
"narHash": "sha256-WbmABjHuixrYrGtiTc7cyj/EA8qta/FjRvmlU3JvKKQ=",
"owner": "huggingface",
"repo": "text-generation-inference-nix",
"rev": "b87d4b5bede0ffed7da50e9a5246b133c7d618dc",
"rev": "93a6aa5c029d893226880d313d24237a379b18c7",
"type": "github"
},
"original": {
"owner": "huggingface",
"ref": "marlin-kernels-0.3.5",
"repo": "text-generation-inference-nix",
"type": "github"
}

View File

@ -5,7 +5,7 @@
inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
};
nix-filter.url = "github:numtide/nix-filter";
tgi-nix.url = "github:huggingface/text-generation-inference-nix/marlin-kernels-0.3.5";
tgi-nix.url = "github:huggingface/text-generation-inference-nix";
nixpkgs.follows = "tgi-nix/nixpkgs";
flake-utils.url = "github:numtide/flake-utils";
rust-overlay = {

26
server/poetry.lock generated
View File

@ -1367,12 +1367,12 @@ files = [
[[package]]
name = "moe-kernels"
version = "0.6.0"
version = "0.7.0"
description = "MoE kernels"
optional = true
python-versions = ">=3.7"
files = [
{file = "moe_kernels-0.6.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:f28fd2a56c3ac7bfe74bc44cc7c8c0791a2644ad689b084ea4ed6decb7f41c25"},
{file = "moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", hash = "sha256:f8c126395f11522881c6bf1f6120e3670822006a84e2ff74af561c22445746b3"},
]
[package.dependencies]
@ -1382,16 +1382,16 @@ triton = "*"
[package.source]
type = "url"
url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl"
url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl"
[[package]]
name = "moe-kernels"
version = "0.6.0"
version = "0.7.0"
description = "MoE kernels"
optional = true
python-versions = ">=3.7"
files = [
{file = "moe_kernels-0.6.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:db475948fd9f7a8647aa3f73256ff4d3bb111425305bcd0b0d3559ccc75b8937"},
{file = "moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", hash = "sha256:2afff8346251f01d5d90bab738e3dfaa6b14a414a9c88205d396ab2bae87983a"},
]
[package.dependencies]
@ -1401,16 +1401,16 @@ triton = "*"
[package.source]
type = "url"
url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl"
url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl"
[[package]]
name = "moe-kernels"
version = "0.6.0"
version = "0.7.0"
description = "MoE kernels"
optional = true
python-versions = ">=3.7"
files = [
{file = "moe_kernels-0.6.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:364be07c06aafbab1f51d9e26d9a4ff658defe1462a4c645abaf7b895ed163a8"},
{file = "moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", hash = "sha256:b1a29e33d3b7d85e2b4f8bd47db28211096d1f645e0868d5a1f3666ebb9bd9e3"},
]
[package.dependencies]
@ -1420,16 +1420,16 @@ triton = "*"
[package.source]
type = "url"
url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl"
url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl"
[[package]]
name = "moe-kernels"
version = "0.6.0"
version = "0.7.0"
description = "MoE kernels"
optional = true
python-versions = ">=3.7"
files = [
{file = "moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:81e7fa25fb5ed5336f5151994f5e3f600df7e166fe013576968c59415e442894"},
{file = "moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", hash = "sha256:9573611174cda9f6fafa1816521e38582fd2903b321bbaf78f83cf6e3189ac7d"},
]
[package.dependencies]
@ -1439,7 +1439,7 @@ triton = "*"
[package.source]
type = "url"
url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl"
url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl"
[[package]]
name = "mpmath"
@ -4066,4 +4066,4 @@ torch = ["torch"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9,<3.13"
content-hash = "b889115cee7f1969856f233e74721965f692e40d2a1c2fceccaf6b3bdb19680d"
content-hash = "7082f1983403ff58a1f0304e8bbf1197715b5156ddeea0f3e8287334d52c2617"

View File

@ -54,10 +54,10 @@ marlin-kernels = [
{ url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.5/marlin_kernels-0.3.5+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
]
moe-kernels = [
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.6.0/moe_kernels-0.6.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
{ url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
]
rich = "^13.7.1"