Add FP8 release test (#2261)

This commit is contained in:
Daniël de Kok 2024-07-20 12:26:06 +02:00 committed by GitHub
parent 11123a8e99
commit e5c1d6d611
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 598 additions and 0 deletions

View File

@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 128000,
"logprob": null,
"text": "<|begin_of_text|>"
},
{
"id": 2323,
"logprob": -9.421875,
"text": "Test"
},
{
"id": 1715,
"logprob": -10.546875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 369,
"logprob": -2.1816406,
"special": false,
"text": " for"
},
{
"id": 279,
"logprob": -2.6992188,
"special": false,
"text": " the"
},
{
"id": 220,
"logprob": -3.6308594,
"special": false,
"text": " "
},
{
"id": 679,
"logprob": -1.7900391,
"special": false,
"text": "201"
},
{
"id": 24,
"logprob": -1.3554688,
"special": false,
"text": "9"
},
{
"id": 12,
"logprob": -2.0039062,
"special": false,
"text": "-"
},
{
"id": 2366,
"logprob": -0.4489746,
"special": false,
"text": "202"
},
{
"id": 15,
"logprob": -0.037109375,
"special": false,
"text": "0"
},
{
"id": 2978,
"logprob": -0.8100586,
"special": false,
"text": " school"
},
{
"id": 1060,
"logprob": -0.013015747,
"special": false,
"text": " year"
}
],
"top_tokens": null
},
"generated_text": " for the 2019-2020 school year"
}

View File

@ -0,0 +1,89 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 128000,
"logprob": null,
"text": "<|begin_of_text|>"
},
{
"id": 2323,
"logprob": -9.421875,
"text": "Test"
},
{
"id": 1715,
"logprob": -10.546875,
"text": " request"
}
],
"seed": 0,
"tokens": [
{
"id": 25,
"logprob": -0.8535156,
"special": false,
"text": ":"
},
{
"id": 2209,
"logprob": -2.4804688,
"special": false,
"text": " Is"
},
{
"id": 279,
"logprob": -0.7167969,
"special": false,
"text": " the"
},
{
"id": 734,
"logprob": -2.625,
"special": false,
"text": " function"
},
{
"id": 330,
"logprob": -0.35131836,
"special": false,
"text": " \""
},
{
"id": 4110,
"logprob": -2.4101562,
"special": false,
"text": "Create"
},
{
"id": 264,
"logprob": -0.23181152,
"special": false,
"text": " a"
},
{
"id": 502,
"logprob": -0.25512695,
"special": false,
"text": " new"
},
{
"id": 1052,
"logprob": -1.2792969,
"special": false,
"text": " file"
},
{
"id": 1,
"logprob": -1.2529297,
"special": false,
"text": "\""
}
],
"top_tokens": null
},
"generated_text": "Test request: Is the function \"Create a new file\""
}

View File

@ -0,0 +1,358 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 128000,
"logprob": null,
"text": "<|begin_of_text|>"
},
{
"id": 2323,
"logprob": -9.421875,
"text": "Test"
},
{
"id": 1715,
"logprob": -10.546875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 369,
"logprob": -2.1816406,
"special": false,
"text": " for"
},
{
"id": 279,
"logprob": -2.6992188,
"special": false,
"text": " the"
},
{
"id": 220,
"logprob": -3.6308594,
"special": false,
"text": " "
},
{
"id": 679,
"logprob": -1.7988281,
"special": false,
"text": "201"
},
{
"id": 24,
"logprob": -1.3535156,
"special": false,
"text": "9"
},
{
"id": 12,
"logprob": -2.0058594,
"special": false,
"text": "-"
},
{
"id": 2366,
"logprob": -0.45410156,
"special": false,
"text": "202"
},
{
"id": 15,
"logprob": -0.037109375,
"special": false,
"text": "0"
},
{
"id": 2978,
"logprob": -0.8095703,
"special": false,
"text": " school"
},
{
"id": 1060,
"logprob": -0.013053894,
"special": false,
"text": " year"
}
],
"top_tokens": null
},
"generated_text": " for the 2019-2020 school year"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 128000,
"logprob": null,
"text": "<|begin_of_text|>"
},
{
"id": 2323,
"logprob": -9.421875,
"text": "Test"
},
{
"id": 1715,
"logprob": -10.546875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 369,
"logprob": -2.1816406,
"special": false,
"text": " for"
},
{
"id": 279,
"logprob": -2.6992188,
"special": false,
"text": " the"
},
{
"id": 220,
"logprob": -3.6308594,
"special": false,
"text": " "
},
{
"id": 679,
"logprob": -1.7988281,
"special": false,
"text": "201"
},
{
"id": 24,
"logprob": -1.3535156,
"special": false,
"text": "9"
},
{
"id": 12,
"logprob": -2.0058594,
"special": false,
"text": "-"
},
{
"id": 2366,
"logprob": -0.45410156,
"special": false,
"text": "202"
},
{
"id": 15,
"logprob": -0.037109375,
"special": false,
"text": "0"
},
{
"id": 2978,
"logprob": -0.8095703,
"special": false,
"text": " school"
},
{
"id": 1060,
"logprob": -0.013053894,
"special": false,
"text": " year"
}
],
"top_tokens": null
},
"generated_text": " for the 2019-2020 school year"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 128000,
"logprob": null,
"text": "<|begin_of_text|>"
},
{
"id": 2323,
"logprob": -9.421875,
"text": "Test"
},
{
"id": 1715,
"logprob": -10.546875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 369,
"logprob": -2.1816406,
"special": false,
"text": " for"
},
{
"id": 279,
"logprob": -2.6992188,
"special": false,
"text": " the"
},
{
"id": 220,
"logprob": -3.6308594,
"special": false,
"text": " "
},
{
"id": 679,
"logprob": -1.7988281,
"special": false,
"text": "201"
},
{
"id": 24,
"logprob": -1.3535156,
"special": false,
"text": "9"
},
{
"id": 12,
"logprob": -2.0058594,
"special": false,
"text": "-"
},
{
"id": 2366,
"logprob": -0.45410156,
"special": false,
"text": "202"
},
{
"id": 15,
"logprob": -0.037109375,
"special": false,
"text": "0"
},
{
"id": 2978,
"logprob": -0.8095703,
"special": false,
"text": " school"
},
{
"id": 1060,
"logprob": -0.013053894,
"special": false,
"text": " year"
}
],
"top_tokens": null
},
"generated_text": " for the 2019-2020 school year"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 128000,
"logprob": null,
"text": "<|begin_of_text|>"
},
{
"id": 2323,
"logprob": -9.421875,
"text": "Test"
},
{
"id": 1715,
"logprob": -10.546875,
"text": " request"
}
],
"seed": null,
"tokens": [
{
"id": 369,
"logprob": -2.1816406,
"special": false,
"text": " for"
},
{
"id": 279,
"logprob": -2.6992188,
"special": false,
"text": " the"
},
{
"id": 220,
"logprob": -3.6308594,
"special": false,
"text": " "
},
{
"id": 679,
"logprob": -1.7988281,
"special": false,
"text": "201"
},
{
"id": 24,
"logprob": -1.3535156,
"special": false,
"text": "9"
},
{
"id": 12,
"logprob": -2.0058594,
"special": false,
"text": "-"
},
{
"id": 2366,
"logprob": -0.45410156,
"special": false,
"text": "202"
},
{
"id": 15,
"logprob": -0.037109375,
"special": false,
"text": "0"
},
{
"id": 2978,
"logprob": -0.8095703,
"special": false,
"text": " school"
},
{
"id": 1060,
"logprob": -0.013053894,
"special": false,
"text": " year"
}
],
"top_tokens": null
},
"generated_text": " for the 2019-2020 school year"
}
]

View File

@ -0,0 +1,62 @@
import pytest
@pytest.fixture(scope="module")
def flash_llama_fp8_handle(launcher):
with launcher("meta-llama/Meta-Llama-3-8B", num_shard=2, quantize="fp8") as handle:
yield handle
@pytest.fixture(scope="module")
async def flash_llama_fp8(flash_llama_fp8_handle):
await flash_llama_fp8_handle.health(300)
return flash_llama_fp8_handle.client
@pytest.mark.release
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_fp8(flash_llama_fp8, response_snapshot):
response = await flash_llama_fp8.generate(
"Test request", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
assert response == response_snapshot
@pytest.mark.release
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_fp8_all_params(flash_llama_fp8, response_snapshot):
response = await flash_llama_fp8.generate(
"Test request",
max_new_tokens=10,
repetition_penalty=1.2,
return_full_text=True,
stop_sequences=["test"],
temperature=0.5,
top_p=0.9,
top_k=10,
truncate=5,
typical_p=0.9,
watermark=True,
decoder_input_details=True,
seed=0,
)
assert response == response_snapshot
@pytest.mark.release
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_fp8_load(flash_llama_fp8, generate_load, response_snapshot):
responses = await generate_load(
flash_llama_fp8, "Test request", max_new_tokens=10, n=4
)
assert len(responses) == 4
assert all([r.generated_text == responses[0].generated_text for r in responses])
assert responses == response_snapshot