All integration tests back everywhere (too many failed CI). (#2428)

* All integration tests back everywhere (too many failed CI).

* Upgrade integration tests after 12.4

* Attempt to remove the specifed compute cap.

* Common arch list.

* Punica uses raw ASM which is not valid on 9.0 apparently.
This commit is contained in:
Nicolas Patry 2024-08-16 21:19:46 +02:00 committed by GitHub
parent 53729b74ac
commit e4201f44cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 32936 additions and 32937 deletions

View File

@ -172,7 +172,7 @@ jobs:
group: ${{ needs.build-and-push.outputs.runs_on }} group: ${{ needs.build-and-push.outputs.runs_on }}
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
env: env:
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '' }} PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }}
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4

View File

@ -88,6 +88,7 @@ RUN case ${TARGETPLATFORM} in \
FROM pytorch-install AS kernel-builder FROM pytorch-install AS kernel-builder
ARG MAX_JOBS=8 ARG MAX_JOBS=8
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;9.0+PTX"
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
ninja-build cmake \ ninja-build cmake \
@ -118,7 +119,7 @@ FROM kernel-builder AS exllama-kernels-builder
WORKDIR /usr/src WORKDIR /usr/src
COPY server/exllama_kernels/ . COPY server/exllama_kernels/ .
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build RUN python setup.py build
# Build Transformers exllama kernels # Build Transformers exllama kernels
FROM kernel-builder AS exllamav2-kernels-builder FROM kernel-builder AS exllamav2-kernels-builder
@ -126,21 +127,21 @@ WORKDIR /usr/src
COPY server/Makefile-exllamav2/ Makefile COPY server/Makefile-exllamav2/ Makefile
# Build specific version of transformers # Build specific version of transformers
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-exllamav2 RUN make build-exllamav2
# Build Transformers awq kernels # Build Transformers awq kernels
FROM kernel-builder AS awq-kernels-builder FROM kernel-builder AS awq-kernels-builder
WORKDIR /usr/src WORKDIR /usr/src
COPY server/Makefile-awq Makefile COPY server/Makefile-awq Makefile
# Build specific version of transformers # Build specific version of transformers
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq RUN make build-awq
# Build eetq kernels # Build eetq kernels
FROM kernel-builder AS eetq-kernels-builder FROM kernel-builder AS eetq-kernels-builder
WORKDIR /usr/src WORKDIR /usr/src
COPY server/Makefile-eetq Makefile COPY server/Makefile-eetq Makefile
# Build specific version of transformers # Build specific version of transformers
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-eetq RUN make build-eetq
# Build Lorax Punica kernels # Build Lorax Punica kernels
FROM kernel-builder AS lorax-punica-builder FROM kernel-builder AS lorax-punica-builder

View File

@ -11,52 +11,52 @@
}, },
{ {
"id": 49833, "id": 49833,
"logprob": -10.546875, "logprob": -10.5703125,
"text": " dég" "text": " dég"
}, },
{ {
"id": 21543, "id": 21543,
"logprob": -0.18457031, "logprob": -0.14746094,
"text": "uster" "text": "uster"
}, },
{ {
"id": 447, "id": 447,
"logprob": -1.9287109, "logprob": -1.9277344,
"text": " un" "text": " un"
}, },
{ {
"id": 46341, "id": 46341,
"logprob": -15.4296875, "logprob": -15.421875,
"text": " ort" "text": " ort"
}, },
{ {
"id": 35567, "id": 35567,
"logprob": -7.578125, "logprob": -7.5820312,
"text": "olan" "text": "olan"
}, },
{ {
"id": 15, "id": 15,
"logprob": -1.4003906, "logprob": -1.4013672,
"text": "," "text": ","
}, },
{ {
"id": 1669, "id": 1669,
"logprob": -1.5439453, "logprob": -1.5595703,
"text": " il" "text": " il"
}, },
{ {
"id": 11580, "id": 11580,
"logprob": -0.93896484, "logprob": -0.9428711,
"text": " faut" "text": " faut"
}, },
{ {
"id": 3913, "id": 3913,
"logprob": -3.7207031, "logprob": -3.703125,
"text": " tout" "text": " tout"
}, },
{ {
"id": 39261, "id": 39261,
"logprob": -1.5742188, "logprob": -1.7763672,
"text": " d'abord" "text": " d'abord"
} }
], ],
@ -64,19 +64,19 @@
"tokens": [ "tokens": [
{ {
"id": 578, "id": 578,
"logprob": -1.6474609, "logprob": -1.7822266,
"special": false, "special": false,
"text": " le" "text": " le"
}, },
{ {
"id": 5608, "id": 5608,
"logprob": -2.4707031, "logprob": -2.4882812,
"special": false, "special": false,
"text": " faire" "text": " faire"
}, },
{ {
"id": 7735, "id": 7735,
"logprob": -2.4355469, "logprob": -2.4199219,
"special": false, "special": false,
"text": " fond" "text": " fond"
}, },
@ -94,13 +94,13 @@
}, },
{ {
"id": 366, "id": 366,
"logprob": -1.1953125, "logprob": -1.1308594,
"special": false, "special": false,
"text": " la" "text": " la"
}, },
{ {
"id": 48844, "id": 48844,
"logprob": -1.7978516, "logprob": -1.7900391,
"special": false, "special": false,
"text": " cass" "text": " cass"
}, },
@ -118,7 +118,7 @@
}, },
{ {
"id": 2940, "id": 2940,
"logprob": -1.9335938, "logprob": -1.9306641,
"special": false, "special": false,
"text": " avec" "text": " avec"
} }

View File

@ -11,7 +11,7 @@
}, },
{ {
"id": 3533, "id": 3533,
"logprob": -9.5625, "logprob": -9.625,
"text": "Test" "text": "Test"
}, },
{ {
@ -24,13 +24,13 @@
"tokens": [ "tokens": [
{ {
"id": 2143, "id": 2143,
"logprob": -1.8203125, "logprob": -1.828125,
"special": false, "special": false,
"text": " sent" "text": " sent"
}, },
{ {
"id": 10081, "id": 10081,
"logprob": -0.55078125, "logprob": -0.41210938,
"special": false, "special": false,
"text": " successfully" "text": " successfully"
}, },
@ -42,7 +42,7 @@
}, },
{ {
"id": 100001, "id": 100001,
"logprob": -0.12695312, "logprob": -0.16015625,
"special": true, "special": true,
"text": "<end▁of▁sentence>" "text": "<end▁of▁sentence>"
} }

View File

@ -24,13 +24,13 @@
"tokens": [ "tokens": [
{ {
"id": 1736, "id": 1736,
"logprob": -2.046875, "logprob": -2.03125,
"special": false, "special": false,
"text": " form" "text": " form"
}, },
{ {
"id": 109, "id": 109,
"logprob": -1.8828125, "logprob": -1.8671875,
"special": false, "special": false,
"text": "\n\n" "text": "\n\n"
}, },
@ -42,48 +42,48 @@
}, },
{ {
"id": 2121, "id": 2121,
"logprob": -1.78125, "logprob": -1.8125,
"special": false, "special": false,
"text": " test" "text": " test"
}, },
{ {
"id": 3853, "id": 3853,
"logprob": -0.23632812, "logprob": -0.24121094,
"special": false, "special": false,
"text": " request" "text": " request"
}, },
{ {
"id": 1736, "id": 1736,
"logprob": -0.09326172, "logprob": -0.100097656,
"special": false, "special": false,
"text": " form" "text": " form"
}, },
{ {
"id": 603, "id": 603,
"logprob": -0.8828125, "logprob": -0.9453125,
"special": false, "special": false,
"text": " is" "text": " is"
}, },
{ {
"id": 1671, "id": 476,
"logprob": -1.6171875, "logprob": -1.703125,
"special": false, "special": false,
"text": " used" "text": " a"
}, },
{ {
"id": 577, "id": 4551,
"logprob": -0.390625, "logprob": -2.453125,
"special": false, "special": false,
"text": " to" "text": " document"
}, },
{ {
"id": 3853, "id": 674,
"logprob": -1.2265625, "logprob": -0.796875,
"special": false, "special": false,
"text": " request" "text": " that"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": " form\n\nThe test request form is used to request" "generated_text": " form\n\nThe test request form is a document that"
} }

View File

@ -8,13 +8,13 @@
"tokens": [ "tokens": [
{ {
"id": 54901, "id": 54901,
"logprob": -0.72753906, "logprob": -0.84765625,
"special": false, "special": false,
"text": "beach" "text": "beach"
}, },
{ {
"id": 1, "id": 1,
"logprob": -0.011009216, "logprob": -0.008666992,
"special": true, "special": true,
"text": "<eos>" "text": "<eos>"
} }

View File

@ -19,25 +19,25 @@
"tokens": [ "tokens": [
{ {
"id": 284, "id": 284,
"logprob": -0.19421387, "logprob": -0.28955078,
"special": false, "special": false,
"text": " to" "text": " to"
}, },
{ {
"id": 3758, "id": 3758,
"logprob": -0.62597656, "logprob": -0.7739258,
"special": false, "special": false,
"text": " send" "text": " send"
}, },
{ {
"id": 1366, "id": 1366,
"logprob": -0.87060547, "logprob": -0.85253906,
"special": false, "special": false,
"text": " data" "text": " data"
}, },
{ {
"id": 625, "id": 625,
"logprob": -0.88427734, "logprob": -0.8984375,
"special": false, "special": false,
"text": " over" "text": " over"
}, },
@ -49,7 +49,7 @@
}, },
{ {
"id": 3127, "id": 3127,
"logprob": -1.9462891, "logprob": -1.9404297,
"special": false, "special": false,
"text": " network" "text": " network"
} }

View File

@ -11,22 +11,22 @@
}, },
{ {
"id": 13, "id": 13,
"logprob": -2.59375, "logprob": -2.734375,
"text": "," "text": ","
}, },
{ {
"id": 8862, "id": 8862,
"logprob": -3.5625, "logprob": -3.6875,
"text": " yellow" "text": " yellow"
}, },
{ {
"id": 13, "id": 13,
"logprob": -0.44726562, "logprob": -0.40234375,
"text": "," "text": ","
}, },
{ {
"id": 209, "id": 209,
"logprob": -8.0, "logprob": -8.25,
"text": " " "text": " "
} }
], ],
@ -52,7 +52,7 @@
}, },
{ {
"id": 9830, "id": 9830,
"logprob": -2.03125, "logprob": -2.25,
"special": false, "special": false,
"text": " colors" "text": " colors"
}, },
@ -64,13 +64,13 @@
}, },
{ {
"id": 329, "id": 329,
"logprob": -2.734375, "logprob": -2.171875,
"special": false, "special": false,
"text": " A" "text": " A"
}, },
{ {
"id": 1180, "id": 1180,
"logprob": -2.0, "logprob": -2.046875,
"special": false, "special": false,
"text": " number" "text": " number"
}, },
@ -81,19 +81,19 @@
"text": " of" "text": " of"
}, },
{ {
"id": 253, "id": 1027,
"logprob": -0.69140625, "logprob": -1.5546875,
"special": false, "special": false,
"text": " the" "text": " different"
}, },
{ {
"id": 3295, "id": 3295,
"logprob": -0.8203125, "logprob": -0.97265625,
"special": false, "special": false,
"text": " color" "text": " color"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "blue, red, yellow, \nand blue colors. A number of the color" "generated_text": "blue, red, yellow, \nand blue colors. A number of different color"
} }

View File

@ -47,7 +47,7 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
assert response.details.generated_tokens == 10 assert response.details.generated_tokens == 10
assert ( assert (
response.generated_text response.generated_text
== "blue, red, yellow, \nand blue colors. A number of the color" == "blue, red, yellow, \nand blue colors. A number of different color"
) )
assert response == response_snapshot assert response == response_snapshot
@ -62,6 +62,7 @@ async def test_mamba_load(
) )
assert len(responses) == 4 assert len(responses) == 4
assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"
assert all([r.generated_text == responses[0].generated_text for r in responses]) assert all([r.generated_text == responses[0].generated_text for r in responses])
assert responses[0].generated_text == "\n\nDeep learning is a new type of machine" assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"

View File

@ -1,10 +1,7 @@
from setuptools import setup from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension from torch.utils.cpp_extension import BuildExtension, CUDAExtension
import torch
extra_compile_args = ["-std=c++17"] extra_compile_args = ["-std=c++17"]
if not torch.version.hip:
extra_compile_args.append("-arch=compute_80")
setup( setup(
name="custom_kernels", name="custom_kernels",