GPTQ CI improvements (#2151)

* Add more representative Llama GPTQ test

The Llama GPTQ test is updated to use a model with the commonly-used
quantizer config format and activation sorting. The old test is
kept around (but renamed) since it tests the format produced by
`text-generation-server quantize`.

* Add support for manually triggering a release build
This commit is contained in:
Daniël de Kok 2024-07-05 14:12:16 +02:00 committed by GitHub
parent b67d46336e
commit 67ef0649cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 805 additions and 283 deletions

View File

@ -11,6 +11,11 @@ on:
# - rocm
# - intel
required: true
release-tests:
description: "Run release integration tests"
required: true
default: false
type: boolean
jobs:
build-and-push:
@ -148,7 +153,7 @@ jobs:
runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"]
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
env:
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main') && '--release' || '' }}
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == 'true') && '--release' || '' }}
steps:
- name: Checkout repository
uses: actions/checkout@v4

View File

@ -20,7 +20,14 @@ on:
- "Dockerfile_amd"
- "Dockerfile_intel"
branches:
- 'main'
- "main"
workflow_dispatch:
inputs:
release-tests:
description: "Run release integration tests"
required: true
default: false
type: boolean
jobs:
build:
@ -33,4 +40,6 @@ jobs:
uses: ./.github/workflows/build.yaml # calls the one above ^
with:
hardware: ${{ matrix.hardware }}
# https://github.com/actions/runner/issues/2206
release-tests: ${{ inputs.release-tests == true }}
secrets: inherit

View File

@ -5,85 +5,80 @@
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"id": 2323,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.7890625,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.625,
"id": 1715,
"logprob": -11.34375,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3359375,
"id": 198,
"logprob": -2.5742188,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8779297,
"id": 262,
"logprob": -1.6230469,
"special": false,
"text": "Test"
"text": " "
},
{
"id": 2009,
"logprob": -1.2744141,
"id": 3270,
"logprob": -2.046875,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1425781,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.9238281,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.6933594,
"id": 13204,
"logprob": -0.076660156,
"special": false,
"text": "\n"
"text": ".method"
},
{
"id": 3057,
"logprob": -1.4648438,
"id": 624,
"logprob": -0.021987915,
"special": false,
"text": "Test"
"text": " =="
},
{
"id": 2009,
"logprob": -0.15600586,
"id": 364,
"logprob": -0.39208984,
"special": false,
"text": " request"
"text": " '"
},
{
"id": 13,
"logprob": -0.8027344,
"id": 3019,
"logprob": -0.10821533,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.23022461,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.0069885254,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.02218628,
"special": false,
"text": "\n"
"text": "POST"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
"generated_text": "\n \"\"\"\n if request.method == 'POST"
}

View File

@ -5,85 +5,80 @@
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"id": 2323,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6015625,
"id": 1715,
"logprob": -11.34375,
"text": " request"
}
],
"seed": 0,
"tokens": [
{
"id": 29899,
"logprob": -1.5625,
"id": 13,
"logprob": -2.2539062,
"special": false,
"text": "-"
"text": "."
},
{
"id": 1454,
"logprob": -0.20410156,
"id": 578,
"logprob": -0.15563965,
"special": false,
"text": "for"
"text": " The"
},
{
"id": 29899,
"id": 3622,
"logprob": -0.8203125,
"special": false,
"text": " server"
},
{
"id": 706,
"logprob": 0.0,
"special": false,
"text": "-"
"text": " has"
},
{
"id": 9342,
"id": 539,
"logprob": 0.0,
"special": false,
"text": "comment"
"text": " not"
},
{
"id": 29901,
"id": 3686,
"logprob": 0.0,
"special": false,
"text": ":"
"text": " yet"
},
{
"id": 396,
"logprob": -0.27685547,
"special": false,
"text": " #"
},
{
"id": 29906,
"logprob": -0.4970703,
"special": false,
"text": "2"
},
{
"id": 29900,
"logprob": -0.80615234,
"special": false,
"text": "0"
},
{
"id": 29896,
"id": 3288,
"logprob": 0.0,
"special": false,
"text": "1"
"text": " sent"
},
{
"id": 29955,
"logprob": -1.0751953,
"id": 904,
"logprob": 0.0,
"special": false,
"text": "7"
"text": " any"
},
{
"id": 828,
"logprob": 0.0,
"special": false,
"text": " data"
},
{
"id": 382,
"logprob": -1.5517578,
"special": false,
"text": ".\n\n"
}
],
"top_tokens": null
},
"generated_text": "Test request-for-comment: #2017"
"generated_text": "Test request. The server has not yet sent any data.\n\n"
}

View File

@ -6,87 +6,82 @@
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"id": 2323,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.828125,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.609375,
"id": 1715,
"logprob": -11.34375,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3300781,
"id": 198,
"logprob": -2.5742188,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8740234,
"id": 262,
"logprob": -1.6220703,
"special": false,
"text": "Test"
"text": " "
},
{
"id": 2009,
"logprob": -1.2646484,
"id": 3270,
"logprob": -2.0410156,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1445312,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.92333984,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7158203,
"id": 13204,
"logprob": -0.07672119,
"special": false,
"text": "\n"
"text": ".method"
},
{
"id": 3057,
"logprob": -1.4667969,
"id": 624,
"logprob": -0.021987915,
"special": false,
"text": "Test"
"text": " =="
},
{
"id": 2009,
"logprob": -0.15344238,
"id": 364,
"logprob": -0.39208984,
"special": false,
"text": " request"
"text": " '"
},
{
"id": 13,
"logprob": -0.81591797,
"id": 3019,
"logprob": -0.10638428,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22973633,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007045746,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021957397,
"special": false,
"text": "\n"
"text": "POST"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
"generated_text": "\n \"\"\"\n if request.method == 'POST"
},
{
"details": {
@ -95,87 +90,82 @@
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"id": 2323,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.59375,
"id": 1715,
"logprob": -11.34375,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3378906,
"id": 198,
"logprob": -2.5742188,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8779297,
"id": 262,
"logprob": -1.6220703,
"special": false,
"text": "Test"
"text": " "
},
{
"id": 2009,
"logprob": -1.2636719,
"id": 3270,
"logprob": -2.0410156,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1445312,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.92333984,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.6992188,
"id": 13204,
"logprob": -0.07672119,
"special": false,
"text": "\n"
"text": ".method"
},
{
"id": 3057,
"logprob": -1.4589844,
"id": 624,
"logprob": -0.021987915,
"special": false,
"text": "Test"
"text": " =="
},
{
"id": 2009,
"logprob": -0.15344238,
"id": 364,
"logprob": -0.39208984,
"special": false,
"text": " request"
"text": " '"
},
{
"id": 13,
"logprob": -0.79052734,
"id": 3019,
"logprob": -0.10638428,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22937012,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007041931,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.022140503,
"special": false,
"text": "\n"
"text": "POST"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
"generated_text": "\n \"\"\"\n if request.method == 'POST"
},
{
"details": {
@ -184,87 +174,82 @@
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"id": 2323,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.609375,
"id": 1715,
"logprob": -11.34375,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3261719,
"id": 198,
"logprob": -2.5742188,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8730469,
"id": 262,
"logprob": -1.6220703,
"special": false,
"text": "Test"
"text": " "
},
{
"id": 2009,
"logprob": -1.2587891,
"id": 3270,
"logprob": -2.0410156,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1445312,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.92333984,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.6894531,
"id": 13204,
"logprob": -0.07672119,
"special": false,
"text": "\n"
"text": ".method"
},
{
"id": 3057,
"logprob": -1.46875,
"id": 624,
"logprob": -0.021987915,
"special": false,
"text": "Test"
"text": " =="
},
{
"id": 2009,
"logprob": -0.1541748,
"id": 364,
"logprob": -0.39208984,
"special": false,
"text": " request"
"text": " '"
},
{
"id": 13,
"logprob": -0.80322266,
"id": 3019,
"logprob": -0.10638428,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22912598,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.0070495605,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021606445,
"special": false,
"text": "\n"
"text": "POST"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
"generated_text": "\n \"\"\"\n if request.method == 'POST"
},
{
"details": {
@ -273,86 +258,81 @@
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"id": 2323,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6015625,
"id": 1715,
"logprob": -11.34375,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3320312,
"id": 198,
"logprob": -2.5742188,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.875,
"id": 262,
"logprob": -1.6220703,
"special": false,
"text": "Test"
"text": " "
},
{
"id": 2009,
"logprob": -1.2646484,
"id": 3270,
"logprob": -2.0410156,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1445312,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.92333984,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.6884766,
"id": 13204,
"logprob": -0.07672119,
"special": false,
"text": "\n"
"text": ".method"
},
{
"id": 3057,
"logprob": -1.4589844,
"id": 624,
"logprob": -0.021987915,
"special": false,
"text": "Test"
"text": " =="
},
{
"id": 2009,
"logprob": -0.15185547,
"id": 364,
"logprob": -0.39208984,
"special": false,
"text": " request"
"text": " '"
},
{
"id": 13,
"logprob": -0.79833984,
"id": 3019,
"logprob": -0.10638428,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22827148,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.006996155,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021560669,
"special": false,
"text": "\n"
"text": "POST"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
"generated_text": "\n \"\"\"\n if request.method == 'POST"
}
]

View File

@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.8359375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6171875,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3417969,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8730469,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2626953,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7060547,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4482422,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.15246582,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.796875,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22766113,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007045746,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021759033,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
}

View File

@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.7890625,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.625,
"text": "request"
}
],
"seed": 0,
"tokens": [
{
"id": 29899,
"logprob": -1.4980469,
"special": false,
"text": "-"
},
{
"id": 1454,
"logprob": -0.19433594,
"special": false,
"text": "for"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 9342,
"logprob": 0.0,
"special": false,
"text": "comment"
},
{
"id": 29901,
"logprob": 0.0,
"special": false,
"text": ":"
},
{
"id": 396,
"logprob": -0.27392578,
"special": false,
"text": " #"
},
{
"id": 29906,
"logprob": -0.49389648,
"special": false,
"text": "2"
},
{
"id": 29900,
"logprob": -0.81103516,
"special": false,
"text": "0"
},
{
"id": 29896,
"logprob": 0.0,
"special": false,
"text": "1"
},
{
"id": 29955,
"logprob": -1.0800781,
"special": false,
"text": "7"
}
],
"top_tokens": null
},
"generated_text": "Test request-for-comment: #2017"
}

View File

@ -0,0 +1,358 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.8828125,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.5859375,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3359375,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8623047,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2451172,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.6923828,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4492188,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.15197754,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.8022461,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22583008,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007095337,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021652222,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.796875,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.625,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3476562,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8789062,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2734375,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.703125,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4677734,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.15454102,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.7973633,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.23278809,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.006980896,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.022033691,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.9296875,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.5703125,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3203125,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8486328,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2480469,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7060547,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4511719,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.1529541,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.81396484,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22180176,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007133484,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021835327,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6171875,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3261719,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8691406,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2597656,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7070312,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4550781,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.1538086,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.79345703,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22924805,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.0070266724,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021942139,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
}
]

View File

@ -3,7 +3,9 @@ import pytest
@pytest.fixture(scope="module")
def flash_llama_gptq_handle(launcher):
with launcher("huggingface/llama-7b-gptq", num_shard=2, quantize="gptq") as handle:
with launcher(
"astronomer/Llama-3-8B-Instruct-GPTQ-4-Bit", num_shard=2, quantize="gptq"
) as handle:
yield handle