All integration tests back everywhere (too many failed CI). (#2428)
* All integration tests back everywhere (too many failed CI). * Upgrade integration tests after 12.4 * Attempt to remove the specifed compute cap. * Common arch list. * Punica uses raw ASM which is not valid on 9.0 apparently.
This commit is contained in:
parent
53729b74ac
commit
e4201f44cf
|
@ -172,7 +172,7 @@ jobs:
|
|||
group: ${{ needs.build-and-push.outputs.runs_on }}
|
||||
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
|
||||
env:
|
||||
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '' }}
|
||||
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
|
|
@ -88,6 +88,7 @@ RUN case ${TARGETPLATFORM} in \
|
|||
FROM pytorch-install AS kernel-builder
|
||||
|
||||
ARG MAX_JOBS=8
|
||||
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;9.0+PTX"
|
||||
|
||||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
||||
ninja-build cmake \
|
||||
|
@ -118,7 +119,7 @@ FROM kernel-builder AS exllama-kernels-builder
|
|||
WORKDIR /usr/src
|
||||
COPY server/exllama_kernels/ .
|
||||
|
||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" python setup.py build
|
||||
RUN python setup.py build
|
||||
|
||||
# Build Transformers exllama kernels
|
||||
FROM kernel-builder AS exllamav2-kernels-builder
|
||||
|
@ -126,21 +127,21 @@ WORKDIR /usr/src
|
|||
COPY server/Makefile-exllamav2/ Makefile
|
||||
|
||||
# Build specific version of transformers
|
||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-exllamav2
|
||||
RUN make build-exllamav2
|
||||
|
||||
# Build Transformers awq kernels
|
||||
FROM kernel-builder AS awq-kernels-builder
|
||||
WORKDIR /usr/src
|
||||
COPY server/Makefile-awq Makefile
|
||||
# Build specific version of transformers
|
||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-awq
|
||||
RUN make build-awq
|
||||
|
||||
# Build eetq kernels
|
||||
FROM kernel-builder AS eetq-kernels-builder
|
||||
WORKDIR /usr/src
|
||||
COPY server/Makefile-eetq Makefile
|
||||
# Build specific version of transformers
|
||||
RUN TORCH_CUDA_ARCH_LIST="8.0;8.6+PTX" make build-eetq
|
||||
RUN make build-eetq
|
||||
|
||||
# Build Lorax Punica kernels
|
||||
FROM kernel-builder AS lorax-punica-builder
|
||||
|
|
|
@ -11,52 +11,52 @@
|
|||
},
|
||||
{
|
||||
"id": 49833,
|
||||
"logprob": -10.546875,
|
||||
"logprob": -10.5703125,
|
||||
"text": " dég"
|
||||
},
|
||||
{
|
||||
"id": 21543,
|
||||
"logprob": -0.18457031,
|
||||
"logprob": -0.14746094,
|
||||
"text": "uster"
|
||||
},
|
||||
{
|
||||
"id": 447,
|
||||
"logprob": -1.9287109,
|
||||
"logprob": -1.9277344,
|
||||
"text": " un"
|
||||
},
|
||||
{
|
||||
"id": 46341,
|
||||
"logprob": -15.4296875,
|
||||
"logprob": -15.421875,
|
||||
"text": " ort"
|
||||
},
|
||||
{
|
||||
"id": 35567,
|
||||
"logprob": -7.578125,
|
||||
"logprob": -7.5820312,
|
||||
"text": "olan"
|
||||
},
|
||||
{
|
||||
"id": 15,
|
||||
"logprob": -1.4003906,
|
||||
"logprob": -1.4013672,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 1669,
|
||||
"logprob": -1.5439453,
|
||||
"logprob": -1.5595703,
|
||||
"text": " il"
|
||||
},
|
||||
{
|
||||
"id": 11580,
|
||||
"logprob": -0.93896484,
|
||||
"logprob": -0.9428711,
|
||||
"text": " faut"
|
||||
},
|
||||
{
|
||||
"id": 3913,
|
||||
"logprob": -3.7207031,
|
||||
"logprob": -3.703125,
|
||||
"text": " tout"
|
||||
},
|
||||
{
|
||||
"id": 39261,
|
||||
"logprob": -1.5742188,
|
||||
"logprob": -1.7763672,
|
||||
"text": " d'abord"
|
||||
}
|
||||
],
|
||||
|
@ -64,19 +64,19 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 578,
|
||||
"logprob": -1.6474609,
|
||||
"logprob": -1.7822266,
|
||||
"special": false,
|
||||
"text": " le"
|
||||
},
|
||||
{
|
||||
"id": 5608,
|
||||
"logprob": -2.4707031,
|
||||
"logprob": -2.4882812,
|
||||
"special": false,
|
||||
"text": " faire"
|
||||
},
|
||||
{
|
||||
"id": 7735,
|
||||
"logprob": -2.4355469,
|
||||
"logprob": -2.4199219,
|
||||
"special": false,
|
||||
"text": " fond"
|
||||
},
|
||||
|
@ -94,13 +94,13 @@
|
|||
},
|
||||
{
|
||||
"id": 366,
|
||||
"logprob": -1.1953125,
|
||||
"logprob": -1.1308594,
|
||||
"special": false,
|
||||
"text": " la"
|
||||
},
|
||||
{
|
||||
"id": 48844,
|
||||
"logprob": -1.7978516,
|
||||
"logprob": -1.7900391,
|
||||
"special": false,
|
||||
"text": " cass"
|
||||
},
|
||||
|
@ -118,7 +118,7 @@
|
|||
},
|
||||
{
|
||||
"id": 2940,
|
||||
"logprob": -1.9335938,
|
||||
"logprob": -1.9306641,
|
||||
"special": false,
|
||||
"text": " avec"
|
||||
}
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
},
|
||||
{
|
||||
"id": 3533,
|
||||
"logprob": -9.5625,
|
||||
"logprob": -9.625,
|
||||
"text": "Test"
|
||||
},
|
||||
{
|
||||
|
@ -24,13 +24,13 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 2143,
|
||||
"logprob": -1.8203125,
|
||||
"logprob": -1.828125,
|
||||
"special": false,
|
||||
"text": " sent"
|
||||
},
|
||||
{
|
||||
"id": 10081,
|
||||
"logprob": -0.55078125,
|
||||
"logprob": -0.41210938,
|
||||
"special": false,
|
||||
"text": " successfully"
|
||||
},
|
||||
|
@ -42,7 +42,7 @@
|
|||
},
|
||||
{
|
||||
"id": 100001,
|
||||
"logprob": -0.12695312,
|
||||
"logprob": -0.16015625,
|
||||
"special": true,
|
||||
"text": "<|end▁of▁sentence|>"
|
||||
}
|
||||
|
|
|
@ -24,13 +24,13 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 1736,
|
||||
"logprob": -2.046875,
|
||||
"logprob": -2.03125,
|
||||
"special": false,
|
||||
"text": " form"
|
||||
},
|
||||
{
|
||||
"id": 109,
|
||||
"logprob": -1.8828125,
|
||||
"logprob": -1.8671875,
|
||||
"special": false,
|
||||
"text": "\n\n"
|
||||
},
|
||||
|
@ -42,48 +42,48 @@
|
|||
},
|
||||
{
|
||||
"id": 2121,
|
||||
"logprob": -1.78125,
|
||||
"logprob": -1.8125,
|
||||
"special": false,
|
||||
"text": " test"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -0.23632812,
|
||||
"logprob": -0.24121094,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
},
|
||||
{
|
||||
"id": 1736,
|
||||
"logprob": -0.09326172,
|
||||
"logprob": -0.100097656,
|
||||
"special": false,
|
||||
"text": " form"
|
||||
},
|
||||
{
|
||||
"id": 603,
|
||||
"logprob": -0.8828125,
|
||||
"logprob": -0.9453125,
|
||||
"special": false,
|
||||
"text": " is"
|
||||
},
|
||||
{
|
||||
"id": 1671,
|
||||
"logprob": -1.6171875,
|
||||
"id": 476,
|
||||
"logprob": -1.703125,
|
||||
"special": false,
|
||||
"text": " used"
|
||||
"text": " a"
|
||||
},
|
||||
{
|
||||
"id": 577,
|
||||
"logprob": -0.390625,
|
||||
"id": 4551,
|
||||
"logprob": -2.453125,
|
||||
"special": false,
|
||||
"text": " to"
|
||||
"text": " document"
|
||||
},
|
||||
{
|
||||
"id": 3853,
|
||||
"logprob": -1.2265625,
|
||||
"id": 674,
|
||||
"logprob": -0.796875,
|
||||
"special": false,
|
||||
"text": " request"
|
||||
"text": " that"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": " form\n\nThe test request form is used to request"
|
||||
"generated_text": " form\n\nThe test request form is a document that"
|
||||
}
|
||||
|
|
|
@ -8,13 +8,13 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 54901,
|
||||
"logprob": -0.72753906,
|
||||
"logprob": -0.84765625,
|
||||
"special": false,
|
||||
"text": "beach"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"logprob": -0.011009216,
|
||||
"logprob": -0.008666992,
|
||||
"special": true,
|
||||
"text": "<eos>"
|
||||
}
|
||||
|
|
|
@ -19,25 +19,25 @@
|
|||
"tokens": [
|
||||
{
|
||||
"id": 284,
|
||||
"logprob": -0.19421387,
|
||||
"logprob": -0.28955078,
|
||||
"special": false,
|
||||
"text": " to"
|
||||
},
|
||||
{
|
||||
"id": 3758,
|
||||
"logprob": -0.62597656,
|
||||
"logprob": -0.7739258,
|
||||
"special": false,
|
||||
"text": " send"
|
||||
},
|
||||
{
|
||||
"id": 1366,
|
||||
"logprob": -0.87060547,
|
||||
"logprob": -0.85253906,
|
||||
"special": false,
|
||||
"text": " data"
|
||||
},
|
||||
{
|
||||
"id": 625,
|
||||
"logprob": -0.88427734,
|
||||
"logprob": -0.8984375,
|
||||
"special": false,
|
||||
"text": " over"
|
||||
},
|
||||
|
@ -49,7 +49,7 @@
|
|||
},
|
||||
{
|
||||
"id": 3127,
|
||||
"logprob": -1.9462891,
|
||||
"logprob": -1.9404297,
|
||||
"special": false,
|
||||
"text": " network"
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,22 +11,22 @@
|
|||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -2.59375,
|
||||
"logprob": -2.734375,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 8862,
|
||||
"logprob": -3.5625,
|
||||
"logprob": -3.6875,
|
||||
"text": " yellow"
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"logprob": -0.44726562,
|
||||
"logprob": -0.40234375,
|
||||
"text": ","
|
||||
},
|
||||
{
|
||||
"id": 209,
|
||||
"logprob": -8.0,
|
||||
"logprob": -8.25,
|
||||
"text": " "
|
||||
}
|
||||
],
|
||||
|
@ -52,7 +52,7 @@
|
|||
},
|
||||
{
|
||||
"id": 9830,
|
||||
"logprob": -2.03125,
|
||||
"logprob": -2.25,
|
||||
"special": false,
|
||||
"text": " colors"
|
||||
},
|
||||
|
@ -64,13 +64,13 @@
|
|||
},
|
||||
{
|
||||
"id": 329,
|
||||
"logprob": -2.734375,
|
||||
"logprob": -2.171875,
|
||||
"special": false,
|
||||
"text": " A"
|
||||
},
|
||||
{
|
||||
"id": 1180,
|
||||
"logprob": -2.0,
|
||||
"logprob": -2.046875,
|
||||
"special": false,
|
||||
"text": " number"
|
||||
},
|
||||
|
@ -81,19 +81,19 @@
|
|||
"text": " of"
|
||||
},
|
||||
{
|
||||
"id": 253,
|
||||
"logprob": -0.69140625,
|
||||
"id": 1027,
|
||||
"logprob": -1.5546875,
|
||||
"special": false,
|
||||
"text": " the"
|
||||
"text": " different"
|
||||
},
|
||||
{
|
||||
"id": 3295,
|
||||
"logprob": -0.8203125,
|
||||
"logprob": -0.97265625,
|
||||
"special": false,
|
||||
"text": " color"
|
||||
}
|
||||
],
|
||||
"top_tokens": null
|
||||
},
|
||||
"generated_text": "blue, red, yellow, \nand blue colors. A number of the color"
|
||||
"generated_text": "blue, red, yellow, \nand blue colors. A number of different color"
|
||||
}
|
||||
|
|
|
@ -47,7 +47,7 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
|
|||
assert response.details.generated_tokens == 10
|
||||
assert (
|
||||
response.generated_text
|
||||
== "blue, red, yellow, \nand blue colors. A number of the color"
|
||||
== "blue, red, yellow, \nand blue colors. A number of different color"
|
||||
)
|
||||
assert response == response_snapshot
|
||||
|
||||
|
@ -62,6 +62,7 @@ async def test_mamba_load(
|
|||
)
|
||||
|
||||
assert len(responses) == 4
|
||||
assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"
|
||||
assert all([r.generated_text == responses[0].generated_text for r in responses])
|
||||
assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"
|
||||
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
from setuptools import setup
|
||||
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
|
||||
import torch
|
||||
|
||||
extra_compile_args = ["-std=c++17"]
|
||||
if not torch.version.hip:
|
||||
extra_compile_args.append("-arch=compute_80")
|
||||
|
||||
setup(
|
||||
name="custom_kernels",
|
||||
|
|
Loading…
Reference in New Issue