Add tests for Mixtral (#2520)

Disable by default because CI runners do not have enough GPUs.
This commit is contained in:
Daniël de Kok 2024-09-16 12:39:18 +02:00 committed by GitHub
parent 9cca3e0b03
commit 7774655297
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 746 additions and 0 deletions

View File

@ -0,0 +1,114 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1824,
"logprob": -6.1445312,
"text": "What"
},
{
"id": 349,
"logprob": -1.4648438,
"text": "is"
},
{
"id": 21135,
"logprob": -13.6875,
"text": "gradient"
},
{
"id": 24871,
"logprob": -1.6005859,
"text": "descent"
},
{
"id": 28804,
"logprob": -0.39526367,
"text": "?"
},
{
"id": 13,
"logprob": -0.640625,
"text": "\n"
},
{
"id": 13,
"logprob": -0.18774414,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 20910,
"logprob": -0.96484375,
"special": false,
"text": "Grad"
},
{
"id": 722,
"logprob": -0.003168106,
"special": false,
"text": "ient"
},
{
"id": 24871,
"logprob": -0.16540527,
"special": false,
"text": " descent"
},
{
"id": 349,
"logprob": -0.08886719,
"special": false,
"text": " is"
},
{
"id": 396,
"logprob": -0.75878906,
"special": false,
"text": " an"
},
{
"id": 18586,
"logprob": -0.5703125,
"special": false,
"text": " optimization"
},
{
"id": 9464,
"logprob": -0.11242676,
"special": false,
"text": " algorithm"
},
{
"id": 1307,
"logprob": -0.7939453,
"special": false,
"text": " used"
},
{
"id": 298,
"logprob": -0.17102051,
"special": false,
"text": " to"
},
{
"id": 26518,
"logprob": -0.34326172,
"special": false,
"text": " minimize"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is an optimization algorithm used to minimize"
}

View File

@ -0,0 +1,99 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 24871,
"logprob": -17.234375,
"text": "descent"
},
{
"id": 28804,
"logprob": -7.4335938,
"text": "?"
},
{
"id": 13,
"logprob": -0.8017578,
"text": "\n"
},
{
"id": 13,
"logprob": -0.32958984,
"text": "\n"
}
],
"seed": 0,
"tokens": [
{
"id": 1313,
"logprob": -2.3613281,
"special": false,
"text": "It"
},
{
"id": 3969,
"logprob": -0.7285156,
"special": false,
"text": " seems"
},
{
"id": 298,
"logprob": -1.3466797,
"special": false,
"text": " to"
},
{
"id": 528,
"logprob": 0.0,
"special": false,
"text": " me"
},
{
"id": 28725,
"logprob": -1.6757812,
"special": false,
"text": ","
},
{
"id": 369,
"logprob": -0.06585693,
"special": false,
"text": " that"
},
{
"id": 513,
"logprob": -1.1269531,
"special": false,
"text": " if"
},
{
"id": 368,
"logprob": 0.0,
"special": false,
"text": " you"
},
{
"id": 28742,
"logprob": -2.4921875,
"special": false,
"text": "'"
},
{
"id": 267,
"logprob": 0.0,
"special": false,
"text": "re"
}
],
"top_tokens": null
},
"generated_text": "What is gradient descent?\n\nIt seems to me, that if you're"
}

View File

@ -0,0 +1,458 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1824,
"logprob": -6.1445312,
"text": "What"
},
{
"id": 349,
"logprob": -1.4648438,
"text": "is"
},
{
"id": 21135,
"logprob": -13.6875,
"text": "gradient"
},
{
"id": 24871,
"logprob": -1.6005859,
"text": "descent"
},
{
"id": 28804,
"logprob": -0.39526367,
"text": "?"
},
{
"id": 13,
"logprob": -0.640625,
"text": "\n"
},
{
"id": 13,
"logprob": -0.18774414,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 20910,
"logprob": -0.96484375,
"special": false,
"text": "Grad"
},
{
"id": 722,
"logprob": -0.003168106,
"special": false,
"text": "ient"
},
{
"id": 24871,
"logprob": -0.16369629,
"special": false,
"text": " descent"
},
{
"id": 349,
"logprob": -0.0881958,
"special": false,
"text": " is"
},
{
"id": 396,
"logprob": -0.76708984,
"special": false,
"text": " an"
},
{
"id": 18586,
"logprob": -0.57373047,
"special": false,
"text": " optimization"
},
{
"id": 9464,
"logprob": -0.11291504,
"special": false,
"text": " algorithm"
},
{
"id": 1307,
"logprob": -0.79589844,
"special": false,
"text": " used"
},
{
"id": 298,
"logprob": -0.1694336,
"special": false,
"text": " to"
},
{
"id": 26518,
"logprob": -0.34350586,
"special": false,
"text": " minimize"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is an optimization algorithm used to minimize"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1824,
"logprob": -6.1445312,
"text": "What"
},
{
"id": 349,
"logprob": -1.4677734,
"text": "is"
},
{
"id": 21135,
"logprob": -13.6875,
"text": "gradient"
},
{
"id": 24871,
"logprob": -1.6015625,
"text": "descent"
},
{
"id": 28804,
"logprob": -0.39453125,
"text": "?"
},
{
"id": 13,
"logprob": -0.6435547,
"text": "\n"
},
{
"id": 13,
"logprob": -0.18713379,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 20910,
"logprob": -0.9628906,
"special": false,
"text": "Grad"
},
{
"id": 722,
"logprob": -0.0032176971,
"special": false,
"text": "ient"
},
{
"id": 24871,
"logprob": -0.16540527,
"special": false,
"text": " descent"
},
{
"id": 349,
"logprob": -0.08898926,
"special": false,
"text": " is"
},
{
"id": 396,
"logprob": -0.765625,
"special": false,
"text": " an"
},
{
"id": 18586,
"logprob": -0.5708008,
"special": false,
"text": " optimization"
},
{
"id": 9464,
"logprob": -0.11401367,
"special": false,
"text": " algorithm"
},
{
"id": 1307,
"logprob": -0.7963867,
"special": false,
"text": " used"
},
{
"id": 298,
"logprob": -0.17028809,
"special": false,
"text": " to"
},
{
"id": 26518,
"logprob": -0.34326172,
"special": false,
"text": " minimize"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is an optimization algorithm used to minimize"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1824,
"logprob": -6.140625,
"text": "What"
},
{
"id": 349,
"logprob": -1.4658203,
"text": "is"
},
{
"id": 21135,
"logprob": -13.6796875,
"text": "gradient"
},
{
"id": 24871,
"logprob": -1.5898438,
"text": "descent"
},
{
"id": 28804,
"logprob": -0.3955078,
"text": "?"
},
{
"id": 13,
"logprob": -0.64501953,
"text": "\n"
},
{
"id": 13,
"logprob": -0.18493652,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 20910,
"logprob": -0.9580078,
"special": false,
"text": "Grad"
},
{
"id": 722,
"logprob": -0.0032176971,
"special": false,
"text": "ient"
},
{
"id": 24871,
"logprob": -0.16552734,
"special": false,
"text": " descent"
},
{
"id": 349,
"logprob": -0.08874512,
"special": false,
"text": " is"
},
{
"id": 396,
"logprob": -0.75878906,
"special": false,
"text": " an"
},
{
"id": 18586,
"logprob": -0.5703125,
"special": false,
"text": " optimization"
},
{
"id": 9464,
"logprob": -0.11236572,
"special": false,
"text": " algorithm"
},
{
"id": 1307,
"logprob": -0.79541016,
"special": false,
"text": " used"
},
{
"id": 298,
"logprob": -0.17102051,
"special": false,
"text": " to"
},
{
"id": 26518,
"logprob": -0.34326172,
"special": false,
"text": " minimize"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is an optimization algorithm used to minimize"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1824,
"logprob": -6.1328125,
"text": "What"
},
{
"id": 349,
"logprob": -1.4658203,
"text": "is"
},
{
"id": 21135,
"logprob": -13.6796875,
"text": "gradient"
},
{
"id": 24871,
"logprob": -1.5947266,
"text": "descent"
},
{
"id": 28804,
"logprob": -0.39648438,
"text": "?"
},
{
"id": 13,
"logprob": -0.6464844,
"text": "\n"
},
{
"id": 13,
"logprob": -0.18688965,
"text": "\n"
}
],
"seed": null,
"tokens": [
{
"id": 20910,
"logprob": -0.9609375,
"special": false,
"text": "Grad"
},
{
"id": 722,
"logprob": -0.003168106,
"special": false,
"text": "ient"
},
{
"id": 24871,
"logprob": -0.16601562,
"special": false,
"text": " descent"
},
{
"id": 349,
"logprob": -0.088134766,
"special": false,
"text": " is"
},
{
"id": 396,
"logprob": -0.7597656,
"special": false,
"text": " an"
},
{
"id": 18586,
"logprob": -0.5708008,
"special": false,
"text": " optimization"
},
{
"id": 9464,
"logprob": -0.11291504,
"special": false,
"text": " algorithm"
},
{
"id": 1307,
"logprob": -0.7944336,
"special": false,
"text": " used"
},
{
"id": 298,
"logprob": -0.17102051,
"special": false,
"text": " to"
},
{
"id": 26518,
"logprob": -0.34399414,
"special": false,
"text": " minimize"
}
],
"top_tokens": null
},
"generated_text": "Gradient descent is an optimization algorithm used to minimize"
}
]

View File

@ -0,0 +1,75 @@
import pytest
@pytest.fixture(scope="module")
def flash_mixtral_handle(launcher):
with launcher("mistralai/Mixtral-8x7B-v0.1", num_shard=8) as handle:
yield handle
@pytest.fixture(scope="module")
async def flash_mixtral(flash_mixtral_handle):
await flash_mixtral_handle.health(300)
return flash_mixtral_handle.client
@pytest.mark.skip(reason="requires > 4 shards")
@pytest.mark.asyncio
async def test_flash_mixtral(flash_mixtral, response_snapshot):
response = await flash_mixtral.generate(
"What is gradient descent?\n\n", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
assert (
response.generated_text
== "Gradient descent is an optimization algorithm used to minimize"
)
assert response == response_snapshot
@pytest.mark.skip(reason="requires > 4 shards")
@pytest.mark.asyncio
async def test_flash_mixtral_all_params(flash_mixtral, response_snapshot):
response = await flash_mixtral.generate(
"What is gradient descent?\n\n",
max_new_tokens=10,
repetition_penalty=1.2,
return_full_text=True,
stop_sequences=["test"],
temperature=0.5,
top_p=0.9,
top_k=10,
truncate=5,
typical_p=0.9,
watermark=True,
decoder_input_details=True,
seed=0,
)
assert response.details.generated_tokens == 10
assert (
response.generated_text
== "What is gradient descent?\n\nIt seems to me, that if you're"
)
assert response == response_snapshot
@pytest.mark.skip(reason="requires > 4 shards")
@pytest.mark.asyncio
async def test_flash_mixtral_load(flash_mixtral, generate_load, response_snapshot):
responses = await generate_load(
flash_mixtral, "What is gradient descent?\n\n", max_new_tokens=10, n=4
)
assert len(responses) == 4
assert responses[0].details.generated_tokens == 10
assert (
responses[0].generated_text
== "Gradient descent is an optimization algorithm used to minimize"
)
assert all(
[r.generated_text == responses[0].generated_text for r in responses]
), f"{[r.generated_text for r in responses]}"
assert responses == response_snapshot