All integration tests back everywhere (too many failed CI). (#2428)

* All integration tests back everywhere (too many failed CI).

* Upgrade integration tests after 12.4

* Attempt to remove the specifed compute cap.

* Common arch list.

* Punica uses raw ASM which is not valid on 9.0 apparently.
This commit is contained in:
Nicolas Patry 2024-08-16 21:19:46 +02:00 committed by GitHub
parent 53729b74ac
commit e4201f44cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 32936 additions and 32937 deletions

View File

@ -172,7 +172,7 @@ jobs:
group: ${{ needs.build-and-push.outputs.runs_on }}
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
env:
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '' }}
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }}
steps:
- name: Checkout repository
uses: actions/checkout@v4

View File

@ -88,6 +88,7 @@ RUN case ${TARGETPLATFORM} in \
FROM pytorch-install AS kernel-builder
ARG MAX_JOBS=8
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;9.0+PTX"
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ninja-build cmake \
@ -118,7 +119,7 @@ FROM kernel-builder AS exllama-kernels-builder
WORKDIR /usr/src
COPY server/exllama_kernels/ .
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
RUN python setup.py build
# Build Transformers exllama kernels
FROM kernel-builder AS exllamav2-kernels-builder
@ -126,21 +127,21 @@ WORKDIR /usr/src
COPY server/Makefile-exllamav2/ Makefile
# Build specific version of transformers
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-exllamav2
RUN make build-exllamav2
# Build Transformers awq kernels
FROM kernel-builder AS awq-kernels-builder
WORKDIR /usr/src
COPY server/Makefile-awq Makefile
# Build specific version of transformers
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq
RUN make build-awq
# Build eetq kernels
FROM kernel-builder AS eetq-kernels-builder
WORKDIR /usr/src
COPY server/Makefile-eetq Makefile
# Build specific version of transformers
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-eetq
RUN make build-eetq
# Build Lorax Punica kernels
FROM kernel-builder AS lorax-punica-builder

View File

@ -11,52 +11,52 @@
},
{
"id": 49833,
"logprob": -10.546875,
"logprob": -10.5703125,
"text": " dég"
},
{
"id": 21543,
"logprob": -0.18457031,
"logprob": -0.14746094,
"text": "uster"
},
{
"id": 447,
"logprob": -1.9287109,
"logprob": -1.9277344,
"text": " un"
},
{
"id": 46341,
"logprob": -15.4296875,
"logprob": -15.421875,
"text": " ort"
},
{
"id": 35567,
"logprob": -7.578125,
"logprob": -7.5820312,
"text": "olan"
},
{
"id": 15,
"logprob": -1.4003906,
"logprob": -1.4013672,
"text": ","
},
{
"id": 1669,
"logprob": -1.5439453,
"logprob": -1.5595703,
"text": " il"
},
{
"id": 11580,
"logprob": -0.93896484,
"logprob": -0.9428711,
"text": " faut"
},
{
"id": 3913,
"logprob": -3.7207031,
"logprob": -3.703125,
"text": " tout"
},
{
"id": 39261,
"logprob": -1.5742188,
"logprob": -1.7763672,
"text": " d'abord"
}
],
@ -64,19 +64,19 @@
"tokens": [
{
"id": 578,
"logprob": -1.6474609,
"logprob": -1.7822266,
"special": false,
"text": " le"
},
{
"id": 5608,
"logprob": -2.4707031,
"logprob": -2.4882812,
"special": false,
"text": " faire"
},
{
"id": 7735,
"logprob": -2.4355469,
"logprob": -2.4199219,
"special": false,
"text": " fond"
},
@ -94,13 +94,13 @@
},
{
"id": 366,
"logprob": -1.1953125,
"logprob": -1.1308594,
"special": false,
"text": " la"
},
{
"id": 48844,
"logprob": -1.7978516,
"logprob": -1.7900391,
"special": false,
"text": " cass"
},
@ -118,7 +118,7 @@
},
{
"id": 2940,
"logprob": -1.9335938,
"logprob": -1.9306641,
"special": false,
"text": " avec"
}

View File

@ -11,7 +11,7 @@
},
{
"id": 3533,
"logprob": -9.5625,
"logprob": -9.625,
"text": "Test"
},
{
@ -24,13 +24,13 @@
"tokens": [
{
"id": 2143,
"logprob": -1.8203125,
"logprob": -1.828125,
"special": false,
"text": " sent"
},
{
"id": 10081,
"logprob": -0.55078125,
"logprob": -0.41210938,
"special": false,
"text": " successfully"
},
@ -42,7 +42,7 @@
},
{
"id": 100001,
"logprob": -0.12695312,
"logprob": -0.16015625,
"special": true,
"text": "<end▁of▁sentence>"
}

View File

@ -24,13 +24,13 @@
"tokens": [
{
"id": 1736,
"logprob": -2.046875,
"logprob": -2.03125,
"special": false,
"text": " form"
},
{
"id": 109,
"logprob": -1.8828125,
"logprob": -1.8671875,
"special": false,
"text": "\n\n"
},
@ -42,48 +42,48 @@
},
{
"id": 2121,
"logprob": -1.78125,
"logprob": -1.8125,
"special": false,
"text": " test"
},
{
"id": 3853,
"logprob": -0.23632812,
"logprob": -0.24121094,
"special": false,
"text": " request"
},
{
"id": 1736,
"logprob": -0.09326172,
"logprob": -0.100097656,
"special": false,
"text": " form"
},
{
"id": 603,
"logprob": -0.8828125,
"logprob": -0.9453125,
"special": false,
"text": " is"
},
{
"id": 1671,
"logprob": -1.6171875,
"id": 476,
"logprob": -1.703125,
"special": false,
"text": " used"
"text": " a"
},
{
"id": 577,
"logprob": -0.390625,
"id": 4551,
"logprob": -2.453125,
"special": false,
"text": " to"
"text": " document"
},
{
"id": 3853,
"logprob": -1.2265625,
"id": 674,
"logprob": -0.796875,
"special": false,
"text": " request"
"text": " that"
}
],
"top_tokens": null
},
"generated_text": " form\n\nThe test request form is used to request"
"generated_text": " form\n\nThe test request form is a document that"
}

View File

@ -8,13 +8,13 @@
"tokens": [
{
"id": 54901,
"logprob": -0.72753906,
"logprob": -0.84765625,
"special": false,
"text": "beach"
},
{
"id": 1,
"logprob": -0.011009216,
"logprob": -0.008666992,
"special": true,
"text": "<eos>"
}

View File

@ -19,25 +19,25 @@
"tokens": [
{
"id": 284,
"logprob": -0.19421387,
"logprob": -0.28955078,
"special": false,
"text": " to"
},
{
"id": 3758,
"logprob": -0.62597656,
"logprob": -0.7739258,
"special": false,
"text": " send"
},
{
"id": 1366,
"logprob": -0.87060547,
"logprob": -0.85253906,
"special": false,
"text": " data"
},
{
"id": 625,
"logprob": -0.88427734,
"logprob": -0.8984375,
"special": false,
"text": " over"
},
@ -49,7 +49,7 @@
},
{
"id": 3127,
"logprob": -1.9462891,
"logprob": -1.9404297,
"special": false,
"text": " network"
}

View File

@ -11,22 +11,22 @@
},
{
"id": 13,
"logprob": -2.59375,
"logprob": -2.734375,
"text": ","
},
{
"id": 8862,
"logprob": -3.5625,
"logprob": -3.6875,
"text": " yellow"
},
{
"id": 13,
"logprob": -0.44726562,
"logprob": -0.40234375,
"text": ","
},
{
"id": 209,
"logprob": -8.0,
"logprob": -8.25,
"text": " "
}
],
@ -52,7 +52,7 @@
},
{
"id": 9830,
"logprob": -2.03125,
"logprob": -2.25,
"special": false,
"text": " colors"
},
@ -64,13 +64,13 @@
},
{
"id": 329,
"logprob": -2.734375,
"logprob": -2.171875,
"special": false,
"text": " A"
},
{
"id": 1180,
"logprob": -2.0,
"logprob": -2.046875,
"special": false,
"text": " number"
},
@ -81,19 +81,19 @@
"text": " of"
},
{
"id": 253,
"logprob": -0.69140625,
"id": 1027,
"logprob": -1.5546875,
"special": false,
"text": " the"
"text": " different"
},
{
"id": 3295,
"logprob": -0.8203125,
"logprob": -0.97265625,
"special": false,
"text": " color"
}
],
"top_tokens": null
},
"generated_text": "blue, red, yellow, \nand blue colors. A number of the color"
"generated_text": "blue, red, yellow, \nand blue colors. A number of different color"
}

View File

@ -47,7 +47,7 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
assert response.details.generated_tokens == 10
assert (
response.generated_text
== "blue, red, yellow, \nand blue colors. A number of the color"
== "blue, red, yellow, \nand blue colors. A number of different color"
)
assert response == response_snapshot
@ -62,6 +62,7 @@ async def test_mamba_load(
)
assert len(responses) == 4
assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"
assert all([r.generated_text == responses[0].generated_text for r in responses])
assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"

View File

@ -1,10 +1,7 @@
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
import torch
extra_compile_args = ["-std=c++17"]
if not torch.version.hip:
extra_compile_args.append("-arch=compute_80")
setup(
name="custom_kernels",