All integration tests back everywhere (too many failed CI). (#2428)
* All integration tests back everywhere (too many failed CI). * Upgrade integration tests after 12.4 * Attempt to remove the specifed compute cap. * Common arch list. * Punica uses raw ASM which is not valid on 9.0 apparently.
This commit is contained in:
parent
53729b74ac
commit
e4201f44cf
|
@ -172,7 +172,7 @@ jobs:
|
||||||
group: ${{ needs.build-and-push.outputs.runs_on }}
|
group: ${{ needs.build-and-push.outputs.runs_on }}
|
||||||
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
|
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
|
||||||
env:
|
env:
|
||||||
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '' }}
|
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
|
@ -88,6 +88,7 @@ RUN case ${TARGETPLATFORM} in \
|
||||||
FROM pytorch-install AS kernel-builder
|
FROM pytorch-install AS kernel-builder
|
||||||
|
|
||||||
ARG MAX_JOBS=8
|
ARG MAX_JOBS=8
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;9.0+PTX"
|
||||||
|
|
||||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||||
ninja-build cmake \
|
ninja-build cmake \
|
||||||
|
@ -118,7 +119,7 @@ FROM kernel-builder AS exllama-kernels-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
COPY server/exllama_kernels/ .
|
COPY server/exllama_kernels/ .
|
||||||
|
|
||||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
|
RUN python setup.py build
|
||||||
|
|
||||||
# Build Transformers exllama kernels
|
# Build Transformers exllama kernels
|
||||||
FROM kernel-builder AS exllamav2-kernels-builder
|
FROM kernel-builder AS exllamav2-kernels-builder
|
||||||
|
@ -126,21 +127,21 @@ WORKDIR /usr/src
|
||||||
COPY server/Makefile-exllamav2/ Makefile
|
COPY server/Makefile-exllamav2/ Makefile
|
||||||
|
|
||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-exllamav2
|
RUN make build-exllamav2
|
||||||
|
|
||||||
# Build Transformers awq kernels
|
# Build Transformers awq kernels
|
||||||
FROM kernel-builder AS awq-kernels-builder
|
FROM kernel-builder AS awq-kernels-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
COPY server/Makefile-awq Makefile
|
COPY server/Makefile-awq Makefile
|
||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq
|
RUN make build-awq
|
||||||
|
|
||||||
# Build eetq kernels
|
# Build eetq kernels
|
||||||
FROM kernel-builder AS eetq-kernels-builder
|
FROM kernel-builder AS eetq-kernels-builder
|
||||||
WORKDIR /usr/src
|
WORKDIR /usr/src
|
||||||
COPY server/Makefile-eetq Makefile
|
COPY server/Makefile-eetq Makefile
|
||||||
# Build specific version of transformers
|
# Build specific version of transformers
|
||||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-eetq
|
RUN make build-eetq
|
||||||
|
|
||||||
# Build Lorax Punica kernels
|
# Build Lorax Punica kernels
|
||||||
FROM kernel-builder AS lorax-punica-builder
|
FROM kernel-builder AS lorax-punica-builder
|
||||||
|
|
|
@ -11,52 +11,52 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 49833,
|
"id": 49833,
|
||||||
"logprob": -10.546875,
|
"logprob": -10.5703125,
|
||||||
"text": " dég"
|
"text": " dég"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 21543,
|
"id": 21543,
|
||||||
"logprob": -0.18457031,
|
"logprob": -0.14746094,
|
||||||
"text": "uster"
|
"text": "uster"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 447,
|
"id": 447,
|
||||||
"logprob": -1.9287109,
|
"logprob": -1.9277344,
|
||||||
"text": " un"
|
"text": " un"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 46341,
|
"id": 46341,
|
||||||
"logprob": -15.4296875,
|
"logprob": -15.421875,
|
||||||
"text": " ort"
|
"text": " ort"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 35567,
|
"id": 35567,
|
||||||
"logprob": -7.578125,
|
"logprob": -7.5820312,
|
||||||
"text": "olan"
|
"text": "olan"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 15,
|
"id": 15,
|
||||||
"logprob": -1.4003906,
|
"logprob": -1.4013672,
|
||||||
"text": ","
|
"text": ","
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1669,
|
"id": 1669,
|
||||||
"logprob": -1.5439453,
|
"logprob": -1.5595703,
|
||||||
"text": " il"
|
"text": " il"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 11580,
|
"id": 11580,
|
||||||
"logprob": -0.93896484,
|
"logprob": -0.9428711,
|
||||||
"text": " faut"
|
"text": " faut"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3913,
|
"id": 3913,
|
||||||
"logprob": -3.7207031,
|
"logprob": -3.703125,
|
||||||
"text": " tout"
|
"text": " tout"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 39261,
|
"id": 39261,
|
||||||
"logprob": -1.5742188,
|
"logprob": -1.7763672,
|
||||||
"text": " d'abord"
|
"text": " d'abord"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -64,19 +64,19 @@
|
||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 578,
|
"id": 578,
|
||||||
"logprob": -1.6474609,
|
"logprob": -1.7822266,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " le"
|
"text": " le"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 5608,
|
"id": 5608,
|
||||||
"logprob": -2.4707031,
|
"logprob": -2.4882812,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " faire"
|
"text": " faire"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 7735,
|
"id": 7735,
|
||||||
"logprob": -2.4355469,
|
"logprob": -2.4199219,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " fond"
|
"text": " fond"
|
||||||
},
|
},
|
||||||
|
@ -94,13 +94,13 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 366,
|
"id": 366,
|
||||||
"logprob": -1.1953125,
|
"logprob": -1.1308594,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " la"
|
"text": " la"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 48844,
|
"id": 48844,
|
||||||
"logprob": -1.7978516,
|
"logprob": -1.7900391,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " cass"
|
"text": " cass"
|
||||||
},
|
},
|
||||||
|
@ -118,7 +118,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2940,
|
"id": 2940,
|
||||||
"logprob": -1.9335938,
|
"logprob": -1.9306641,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " avec"
|
"text": " avec"
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3533,
|
"id": 3533,
|
||||||
"logprob": -9.5625,
|
"logprob": -9.625,
|
||||||
"text": "Test"
|
"text": "Test"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -24,13 +24,13 @@
|
||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 2143,
|
"id": 2143,
|
||||||
"logprob": -1.8203125,
|
"logprob": -1.828125,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " sent"
|
"text": " sent"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 10081,
|
"id": 10081,
|
||||||
"logprob": -0.55078125,
|
"logprob": -0.41210938,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " successfully"
|
"text": " successfully"
|
||||||
},
|
},
|
||||||
|
@ -42,7 +42,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 100001,
|
"id": 100001,
|
||||||
"logprob": -0.12695312,
|
"logprob": -0.16015625,
|
||||||
"special": true,
|
"special": true,
|
||||||
"text": "<|end▁of▁sentence|>"
|
"text": "<|end▁of▁sentence|>"
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,13 +24,13 @@
|
||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 1736,
|
"id": 1736,
|
||||||
"logprob": -2.046875,
|
"logprob": -2.03125,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " form"
|
"text": " form"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 109,
|
"id": 109,
|
||||||
"logprob": -1.8828125,
|
"logprob": -1.8671875,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "\n\n"
|
"text": "\n\n"
|
||||||
},
|
},
|
||||||
|
@ -42,48 +42,48 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2121,
|
"id": 2121,
|
||||||
"logprob": -1.78125,
|
"logprob": -1.8125,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " test"
|
"text": " test"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3853,
|
"id": 3853,
|
||||||
"logprob": -0.23632812,
|
"logprob": -0.24121094,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " request"
|
"text": " request"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1736,
|
"id": 1736,
|
||||||
"logprob": -0.09326172,
|
"logprob": -0.100097656,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " form"
|
"text": " form"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 603,
|
"id": 603,
|
||||||
"logprob": -0.8828125,
|
"logprob": -0.9453125,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " is"
|
"text": " is"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1671,
|
"id": 476,
|
||||||
"logprob": -1.6171875,
|
"logprob": -1.703125,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " used"
|
"text": " a"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 577,
|
"id": 4551,
|
||||||
"logprob": -0.390625,
|
"logprob": -2.453125,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " to"
|
"text": " document"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3853,
|
"id": 674,
|
||||||
"logprob": -1.2265625,
|
"logprob": -0.796875,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " request"
|
"text": " that"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"top_tokens": null
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": " form\n\nThe test request form is used to request"
|
"generated_text": " form\n\nThe test request form is a document that"
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,13 +8,13 @@
|
||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 54901,
|
"id": 54901,
|
||||||
"logprob": -0.72753906,
|
"logprob": -0.84765625,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": "beach"
|
"text": "beach"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"logprob": -0.011009216,
|
"logprob": -0.008666992,
|
||||||
"special": true,
|
"special": true,
|
||||||
"text": "<eos>"
|
"text": "<eos>"
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,25 +19,25 @@
|
||||||
"tokens": [
|
"tokens": [
|
||||||
{
|
{
|
||||||
"id": 284,
|
"id": 284,
|
||||||
"logprob": -0.19421387,
|
"logprob": -0.28955078,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " to"
|
"text": " to"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3758,
|
"id": 3758,
|
||||||
"logprob": -0.62597656,
|
"logprob": -0.7739258,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " send"
|
"text": " send"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1366,
|
"id": 1366,
|
||||||
"logprob": -0.87060547,
|
"logprob": -0.85253906,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " data"
|
"text": " data"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 625,
|
"id": 625,
|
||||||
"logprob": -0.88427734,
|
"logprob": -0.8984375,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " over"
|
"text": " over"
|
||||||
},
|
},
|
||||||
|
@ -49,7 +49,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3127,
|
"id": 3127,
|
||||||
"logprob": -1.9462891,
|
"logprob": -1.9404297,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " network"
|
"text": " network"
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,22 +11,22 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 13,
|
"id": 13,
|
||||||
"logprob": -2.59375,
|
"logprob": -2.734375,
|
||||||
"text": ","
|
"text": ","
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 8862,
|
"id": 8862,
|
||||||
"logprob": -3.5625,
|
"logprob": -3.6875,
|
||||||
"text": " yellow"
|
"text": " yellow"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 13,
|
"id": 13,
|
||||||
"logprob": -0.44726562,
|
"logprob": -0.40234375,
|
||||||
"text": ","
|
"text": ","
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 209,
|
"id": 209,
|
||||||
"logprob": -8.0,
|
"logprob": -8.25,
|
||||||
"text": " "
|
"text": " "
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -52,7 +52,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 9830,
|
"id": 9830,
|
||||||
"logprob": -2.03125,
|
"logprob": -2.25,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " colors"
|
"text": " colors"
|
||||||
},
|
},
|
||||||
|
@ -64,13 +64,13 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 329,
|
"id": 329,
|
||||||
"logprob": -2.734375,
|
"logprob": -2.171875,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " A"
|
"text": " A"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 1180,
|
"id": 1180,
|
||||||
"logprob": -2.0,
|
"logprob": -2.046875,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " number"
|
"text": " number"
|
||||||
},
|
},
|
||||||
|
@ -81,19 +81,19 @@
|
||||||
"text": " of"
|
"text": " of"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 253,
|
"id": 1027,
|
||||||
"logprob": -0.69140625,
|
"logprob": -1.5546875,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " the"
|
"text": " different"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3295,
|
"id": 3295,
|
||||||
"logprob": -0.8203125,
|
"logprob": -0.97265625,
|
||||||
"special": false,
|
"special": false,
|
||||||
"text": " color"
|
"text": " color"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"top_tokens": null
|
"top_tokens": null
|
||||||
},
|
},
|
||||||
"generated_text": "blue, red, yellow, \nand blue colors. A number of the color"
|
"generated_text": "blue, red, yellow, \nand blue colors. A number of different color"
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
|
||||||
assert response.details.generated_tokens == 10
|
assert response.details.generated_tokens == 10
|
||||||
assert (
|
assert (
|
||||||
response.generated_text
|
response.generated_text
|
||||||
== "blue, red, yellow, \nand blue colors. A number of the color"
|
== "blue, red, yellow, \nand blue colors. A number of different color"
|
||||||
)
|
)
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
@ -62,6 +62,7 @@ async def test_mamba_load(
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(responses) == 4
|
assert len(responses) == 4
|
||||||
|
assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"
|
||||||
assert all([r.generated_text == responses[0].generated_text for r in responses])
|
assert all([r.generated_text == responses[0].generated_text for r in responses])
|
||||||
assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"
|
assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,7 @@
|
||||||
from setuptools import setup
|
from setuptools import setup
|
||||||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
|
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
|
||||||
import torch
|
|
||||||
|
|
||||||
extra_compile_args = ["-std=c++17"]
|
extra_compile_args = ["-std=c++17"]
|
||||||
if not torch.version.hip:
|
|
||||||
extra_compile_args.append("-arch=compute_80")
|
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="custom_kernels",
|
name="custom_kernels",
|
||||||
|
|
Loading…
Reference in New Issue