GPTQ CI improvements (#2151)

* Add more representative Llama GPTQ test

The Llama GPTQ test is updated to use a model with the commonly-used
quantizer config format and activation sorting. The old test is
kept around (but renamed) since it tests the format produced by
`text-generation-server quantize`.

* Add support for manually triggering a release build
This commit is contained in:
Daniël de Kok 2024-07-05 14:12:16 +02:00 committed by GitHub
parent b67d46336e
commit 67ef0649cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 805 additions and 283 deletions

View File

@ -11,6 +11,11 @@ on:
# - rocm # - rocm
# - intel # - intel
required: true required: true
release-tests:
description: "Run release integration tests"
required: true
default: false
type: boolean
jobs: jobs:
build-and-push: build-and-push:
@ -148,7 +153,7 @@ jobs:
runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"] runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"]
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
env: env:
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main') && '--release' || '' }} PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == 'true') && '--release' || '' }}
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v4 uses: actions/checkout@v4

View File

@ -20,7 +20,14 @@ on:
- "Dockerfile_amd" - "Dockerfile_amd"
- "Dockerfile_intel" - "Dockerfile_intel"
branches: branches:
- 'main' - "main"
workflow_dispatch:
inputs:
release-tests:
description: "Run release integration tests"
required: true
default: false
type: boolean
jobs: jobs:
build: build:
@ -33,4 +40,6 @@ jobs:
uses: ./.github/workflows/build.yaml # calls the one above ^ uses: ./.github/workflows/build.yaml # calls the one above ^
with: with:
hardware: ${{ matrix.hardware }} hardware: ${{ matrix.hardware }}
# https://github.com/actions/runner/issues/2206
release-tests: ${{ inputs.release-tests == true }}
secrets: inherit secrets: inherit

View File

@ -5,85 +5,80 @@
"generated_tokens": 10, "generated_tokens": 10,
"prefill": [ "prefill": [
{ {
"id": 1, "id": 2323,
"logprob": null, "logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.7890625,
"text": "Test" "text": "Test"
}, },
{ {
"id": 2009, "id": 1715,
"logprob": -9.625, "logprob": -11.34375,
"text": "request" "text": " request"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 198,
"logprob": -2.3359375, "logprob": -2.5742188,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 3057, "id": 262,
"logprob": -1.8779297, "logprob": -1.6230469,
"special": false, "special": false,
"text": "Test" "text": " "
}, },
{ {
"id": 2009, "id": 3270,
"logprob": -1.2744141, "logprob": -2.046875,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1425781,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.9238281,
"special": false, "special": false,
"text": " request" "text": " request"
}, },
{ {
"id": 13, "id": 13204,
"logprob": -1.6933594, "logprob": -0.076660156,
"special": false, "special": false,
"text": "\n" "text": ".method"
}, },
{ {
"id": 3057, "id": 624,
"logprob": -1.4648438, "logprob": -0.021987915,
"special": false, "special": false,
"text": "Test" "text": " =="
}, },
{ {
"id": 2009, "id": 364,
"logprob": -0.15600586, "logprob": -0.39208984,
"special": false, "special": false,
"text": " request" "text": " '"
}, },
{ {
"id": 13, "id": 3019,
"logprob": -0.8027344, "logprob": -0.10821533,
"special": false, "special": false,
"text": "\n" "text": "POST"
},
{
"id": 3057,
"logprob": -0.23022461,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.0069885254,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.02218628,
"special": false,
"text": "\n"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\nTest request\nTest request\nTest request\n" "generated_text": "\n \"\"\"\n if request.method == 'POST"
} }

View File

@ -5,85 +5,80 @@
"generated_tokens": 10, "generated_tokens": 10,
"prefill": [ "prefill": [
{ {
"id": 1, "id": 2323,
"logprob": null, "logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test" "text": "Test"
}, },
{ {
"id": 2009, "id": 1715,
"logprob": -9.6015625, "logprob": -11.34375,
"text": "request" "text": " request"
} }
], ],
"seed": 0, "seed": 0,
"tokens": [ "tokens": [
{ {
"id": 29899, "id": 13,
"logprob": -1.5625, "logprob": -2.2539062,
"special": false, "special": false,
"text": "-" "text": "."
}, },
{ {
"id": 1454, "id": 578,
"logprob": -0.20410156, "logprob": -0.15563965,
"special": false, "special": false,
"text": "for" "text": " The"
}, },
{ {
"id": 29899, "id": 3622,
"logprob": -0.8203125,
"special": false,
"text": " server"
},
{
"id": 706,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "-" "text": " has"
}, },
{ {
"id": 9342, "id": 539,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "comment" "text": " not"
}, },
{ {
"id": 29901, "id": 3686,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": ":" "text": " yet"
}, },
{ {
"id": 396, "id": 3288,
"logprob": -0.27685547,
"special": false,
"text": " #"
},
{
"id": 29906,
"logprob": -0.4970703,
"special": false,
"text": "2"
},
{
"id": 29900,
"logprob": -0.80615234,
"special": false,
"text": "0"
},
{
"id": 29896,
"logprob": 0.0, "logprob": 0.0,
"special": false, "special": false,
"text": "1" "text": " sent"
}, },
{ {
"id": 29955, "id": 904,
"logprob": -1.0751953, "logprob": 0.0,
"special": false, "special": false,
"text": "7" "text": " any"
},
{
"id": 828,
"logprob": 0.0,
"special": false,
"text": " data"
},
{
"id": 382,
"logprob": -1.5517578,
"special": false,
"text": ".\n\n"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "Test request-for-comment: #2017" "generated_text": "Test request. The server has not yet sent any data.\n\n"
} }

View File

@ -6,87 +6,82 @@
"generated_tokens": 10, "generated_tokens": 10,
"prefill": [ "prefill": [
{ {
"id": 1, "id": 2323,
"logprob": null, "logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.828125,
"text": "Test" "text": "Test"
}, },
{ {
"id": 2009, "id": 1715,
"logprob": -9.609375, "logprob": -11.34375,
"text": "request" "text": " request"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 198,
"logprob": -2.3300781, "logprob": -2.5742188,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 3057, "id": 262,
"logprob": -1.8740234, "logprob": -1.6220703,
"special": false, "special": false,
"text": "Test" "text": " "
}, },
{ {
"id": 2009, "id": 3270,
"logprob": -1.2646484, "logprob": -2.0410156,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1445312,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.92333984,
"special": false, "special": false,
"text": " request" "text": " request"
}, },
{ {
"id": 13, "id": 13204,
"logprob": -1.7158203, "logprob": -0.07672119,
"special": false, "special": false,
"text": "\n" "text": ".method"
}, },
{ {
"id": 3057, "id": 624,
"logprob": -1.4667969, "logprob": -0.021987915,
"special": false, "special": false,
"text": "Test" "text": " =="
}, },
{ {
"id": 2009, "id": 364,
"logprob": -0.15344238, "logprob": -0.39208984,
"special": false, "special": false,
"text": " request" "text": " '"
}, },
{ {
"id": 13, "id": 3019,
"logprob": -0.81591797, "logprob": -0.10638428,
"special": false, "special": false,
"text": "\n" "text": "POST"
},
{
"id": 3057,
"logprob": -0.22973633,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007045746,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021957397,
"special": false,
"text": "\n"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\nTest request\nTest request\nTest request\n" "generated_text": "\n \"\"\"\n if request.method == 'POST"
}, },
{ {
"details": { "details": {
@ -95,87 +90,82 @@
"generated_tokens": 10, "generated_tokens": 10,
"prefill": [ "prefill": [
{ {
"id": 1, "id": 2323,
"logprob": null, "logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test" "text": "Test"
}, },
{ {
"id": 2009, "id": 1715,
"logprob": -9.59375, "logprob": -11.34375,
"text": "request" "text": " request"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 198,
"logprob": -2.3378906, "logprob": -2.5742188,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 3057, "id": 262,
"logprob": -1.8779297, "logprob": -1.6220703,
"special": false, "special": false,
"text": "Test" "text": " "
}, },
{ {
"id": 2009, "id": 3270,
"logprob": -1.2636719, "logprob": -2.0410156,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1445312,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.92333984,
"special": false, "special": false,
"text": " request" "text": " request"
}, },
{ {
"id": 13, "id": 13204,
"logprob": -1.6992188, "logprob": -0.07672119,
"special": false, "special": false,
"text": "\n" "text": ".method"
}, },
{ {
"id": 3057, "id": 624,
"logprob": -1.4589844, "logprob": -0.021987915,
"special": false, "special": false,
"text": "Test" "text": " =="
}, },
{ {
"id": 2009, "id": 364,
"logprob": -0.15344238, "logprob": -0.39208984,
"special": false, "special": false,
"text": " request" "text": " '"
}, },
{ {
"id": 13, "id": 3019,
"logprob": -0.79052734, "logprob": -0.10638428,
"special": false, "special": false,
"text": "\n" "text": "POST"
},
{
"id": 3057,
"logprob": -0.22937012,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007041931,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.022140503,
"special": false,
"text": "\n"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\nTest request\nTest request\nTest request\n" "generated_text": "\n \"\"\"\n if request.method == 'POST"
}, },
{ {
"details": { "details": {
@ -184,87 +174,82 @@
"generated_tokens": 10, "generated_tokens": 10,
"prefill": [ "prefill": [
{ {
"id": 1, "id": 2323,
"logprob": null, "logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test" "text": "Test"
}, },
{ {
"id": 2009, "id": 1715,
"logprob": -9.609375, "logprob": -11.34375,
"text": "request" "text": " request"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 198,
"logprob": -2.3261719, "logprob": -2.5742188,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 3057, "id": 262,
"logprob": -1.8730469, "logprob": -1.6220703,
"special": false, "special": false,
"text": "Test" "text": " "
}, },
{ {
"id": 2009, "id": 3270,
"logprob": -1.2587891, "logprob": -2.0410156,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1445312,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.92333984,
"special": false, "special": false,
"text": " request" "text": " request"
}, },
{ {
"id": 13, "id": 13204,
"logprob": -1.6894531, "logprob": -0.07672119,
"special": false, "special": false,
"text": "\n" "text": ".method"
}, },
{ {
"id": 3057, "id": 624,
"logprob": -1.46875, "logprob": -0.021987915,
"special": false, "special": false,
"text": "Test" "text": " =="
}, },
{ {
"id": 2009, "id": 364,
"logprob": -0.1541748, "logprob": -0.39208984,
"special": false, "special": false,
"text": " request" "text": " '"
}, },
{ {
"id": 13, "id": 3019,
"logprob": -0.80322266, "logprob": -0.10638428,
"special": false, "special": false,
"text": "\n" "text": "POST"
},
{
"id": 3057,
"logprob": -0.22912598,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.0070495605,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021606445,
"special": false,
"text": "\n"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\nTest request\nTest request\nTest request\n" "generated_text": "\n \"\"\"\n if request.method == 'POST"
}, },
{ {
"details": { "details": {
@ -273,86 +258,81 @@
"generated_tokens": 10, "generated_tokens": 10,
"prefill": [ "prefill": [
{ {
"id": 1, "id": 2323,
"logprob": null, "logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test" "text": "Test"
}, },
{ {
"id": 2009, "id": 1715,
"logprob": -9.6015625, "logprob": -11.34375,
"text": "request" "text": " request"
} }
], ],
"seed": null, "seed": null,
"tokens": [ "tokens": [
{ {
"id": 13, "id": 198,
"logprob": -2.3320312, "logprob": -2.5742188,
"special": false, "special": false,
"text": "\n" "text": "\n"
}, },
{ {
"id": 3057, "id": 262,
"logprob": -1.875, "logprob": -1.6220703,
"special": false, "special": false,
"text": "Test" "text": " "
}, },
{ {
"id": 2009, "id": 3270,
"logprob": -1.2646484, "logprob": -2.0410156,
"special": false,
"text": " \"\"\"\n"
},
{
"id": 262,
"logprob": -0.015281677,
"special": false,
"text": " "
},
{
"id": 422,
"logprob": -2.1445312,
"special": false,
"text": " if"
},
{
"id": 1715,
"logprob": -0.92333984,
"special": false, "special": false,
"text": " request" "text": " request"
}, },
{ {
"id": 13, "id": 13204,
"logprob": -1.6884766, "logprob": -0.07672119,
"special": false, "special": false,
"text": "\n" "text": ".method"
}, },
{ {
"id": 3057, "id": 624,
"logprob": -1.4589844, "logprob": -0.021987915,
"special": false, "special": false,
"text": "Test" "text": " =="
}, },
{ {
"id": 2009, "id": 364,
"logprob": -0.15185547, "logprob": -0.39208984,
"special": false, "special": false,
"text": " request" "text": " '"
}, },
{ {
"id": 13, "id": 3019,
"logprob": -0.79833984, "logprob": -0.10638428,
"special": false, "special": false,
"text": "\n" "text": "POST"
},
{
"id": 3057,
"logprob": -0.22827148,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.006996155,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021560669,
"special": false,
"text": "\n"
} }
], ],
"top_tokens": null "top_tokens": null
}, },
"generated_text": "\nTest request\nTest request\nTest request\n" "generated_text": "\n \"\"\"\n if request.method == 'POST"
} }
] ]

View File

@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.8359375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6171875,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3417969,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8730469,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2626953,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7060547,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4482422,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.15246582,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.796875,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22766113,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007045746,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021759033,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
}

View File

@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.7890625,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.625,
"text": "request"
}
],
"seed": 0,
"tokens": [
{
"id": 29899,
"logprob": -1.4980469,
"special": false,
"text": "-"
},
{
"id": 1454,
"logprob": -0.19433594,
"special": false,
"text": "for"
},
{
"id": 29899,
"logprob": 0.0,
"special": false,
"text": "-"
},
{
"id": 9342,
"logprob": 0.0,
"special": false,
"text": "comment"
},
{
"id": 29901,
"logprob": 0.0,
"special": false,
"text": ":"
},
{
"id": 396,
"logprob": -0.27392578,
"special": false,
"text": " #"
},
{
"id": 29906,
"logprob": -0.49389648,
"special": false,
"text": "2"
},
{
"id": 29900,
"logprob": -0.81103516,
"special": false,
"text": "0"
},
{
"id": 29896,
"logprob": 0.0,
"special": false,
"text": "1"
},
{
"id": 29955,
"logprob": -1.0800781,
"special": false,
"text": "7"
}
],
"top_tokens": null
},
"generated_text": "Test request-for-comment: #2017"
}

View File

@ -0,0 +1,358 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.8828125,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.5859375,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3359375,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8623047,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2451172,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.6923828,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4492188,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.15197754,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.8022461,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22583008,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007095337,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021652222,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.796875,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.625,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3476562,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8789062,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2734375,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.703125,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4677734,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.15454102,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.7973633,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.23278809,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.006980896,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.022033691,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.9296875,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.5703125,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3203125,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8486328,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2480469,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7060547,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4511719,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.1529541,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.81396484,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22180176,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.007133484,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021835327,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 4321,
"logprob": -9.84375,
"text": "Test"
},
{
"id": 2009,
"logprob": -9.6171875,
"text": "request"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -2.3261719,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.8691406,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -1.2597656,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -1.7070312,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -1.4550781,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.1538086,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.79345703,
"special": false,
"text": "\n"
},
{
"id": 3057,
"logprob": -0.22924805,
"special": false,
"text": "Test"
},
{
"id": 2009,
"logprob": -0.0070266724,
"special": false,
"text": " request"
},
{
"id": 13,
"logprob": -0.021942139,
"special": false,
"text": "\n"
}
],
"top_tokens": null
},
"generated_text": "\nTest request\nTest request\nTest request\n"
}
]

View File

@ -3,7 +3,9 @@ import pytest
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def flash_llama_gptq_handle(launcher): def flash_llama_gptq_handle(launcher):
with launcher("huggingface/llama-7b-gptq", num_shard=2, quantize="gptq") as handle: with launcher(
"astronomer/Llama-3-8B-Instruct-GPTQ-4-Bit", num_shard=2, quantize="gptq"
) as handle:
yield handle yield handle