Adding GPTQ integration tests.

This commit is contained in:
Ubuntu 2023-06-19 12:14:17 +00:00
parent 16d0fb04ae
commit dca0fe2585
5 changed files with 675 additions and 2 deletions

View File

@ -232,9 +232,9 @@ def launcher(event_loop):
if num_shard is not None:
args.extend(["--num-shard", str(num_shard)])
if quantize:
if quantize is not None:
args.append("--quantize")
args.append("bitsandbytes")
args.append(quantize)
if trust_remote_code:
args.append("--trust-remote-code")

View File

@ -0,0 +1,103 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -9.953125,
"text": "What"
},
{
"id": 338,
"logprob": -1.4121094,
"text": "is"
},
{
"id": 6483,
"logprob": -9.9765625,
"text": "deep"
},
{
"id": 6509,
"logprob": -1.6767578,
"text": "learning"
},
{
"id": 1577,
"logprob": -4.5976562,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.21813965,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -1.4130859,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.0028419495,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.3244629,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.25439453,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.43774414,
"special": false,
"text": " a"
},
{
"id": 4933,
"logprob": -1.8105469,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.07116699,
"special": false,
"text": " learning"
},
{
"id": 11043,
"logprob": -0.87158203,
"special": false,
"text": " technique"
},
{
"id": 393,
"logprob": -0.91015625,
"special": false,
"text": " that"
}
]
},
"generated_text": "\nDeep learning is a machine learning technique that"
}

View File

@ -0,0 +1,98 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 338,
"logprob": -10.8046875,
"text": "is"
},
{
"id": 6483,
"logprob": -12.6640625,
"text": "deep"
},
{
"id": 6509,
"logprob": -3.3398438,
"text": "learning"
},
{
"id": 1577,
"logprob": -8.3828125,
"text": "?"
}
],
"seed": 0,
"tokens": [
{
"id": 13,
"logprob": 0.0,
"special": false,
"text": "\n"
},
{
"id": 4013,
"logprob": -2.6992188,
"special": false,
"text": "This"
},
{
"id": 1139,
"logprob": -0.35668945,
"special": false,
"text": " question"
},
{
"id": 756,
"logprob": -0.08251953,
"special": false,
"text": " has"
},
{
"id": 1063,
"logprob": -0.39697266,
"special": false,
"text": " been"
},
{
"id": 4433,
"logprob": 0.0,
"special": false,
"text": " asked"
},
{
"id": 1784,
"logprob": -0.9248047,
"special": false,
"text": " many"
},
{
"id": 3064,
"logprob": 0.0,
"special": false,
"text": " times"
},
{
"id": 1434,
"logprob": -0.90625,
"special": false,
"text": " before"
},
{
"id": 29892,
"logprob": -0.19580078,
"special": false,
"text": ","
}
]
},
"generated_text": "What is deep learning ?\nThis question has been asked many times before,"
}

View File

@ -0,0 +1,414 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -9.953125,
"text": "What"
},
{
"id": 338,
"logprob": -1.4121094,
"text": "is"
},
{
"id": 6483,
"logprob": -9.9765625,
"text": "deep"
},
{
"id": 6509,
"logprob": -1.6767578,
"text": "learning"
},
{
"id": 1577,
"logprob": -4.5976562,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.21813965,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -1.4189453,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.0028419495,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.3244629,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.25439453,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.4375,
"special": false,
"text": " a"
},
{
"id": 4933,
"logprob": -1.8105469,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.07116699,
"special": false,
"text": " learning"
},
{
"id": 11043,
"logprob": -0.87158203,
"special": false,
"text": " technique"
},
{
"id": 393,
"logprob": -0.91015625,
"special": false,
"text": " that"
}
]
},
"generated_text": "\nDeep learning is a machine learning technique that"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -9.953125,
"text": "What"
},
{
"id": 338,
"logprob": -1.4121094,
"text": "is"
},
{
"id": 6483,
"logprob": -9.9765625,
"text": "deep"
},
{
"id": 6509,
"logprob": -1.6767578,
"text": "learning"
},
{
"id": 1577,
"logprob": -4.5976562,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.21813965,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -1.4189453,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.0028419495,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.3244629,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.25439453,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.4375,
"special": false,
"text": " a"
},
{
"id": 4933,
"logprob": -1.8105469,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.07116699,
"special": false,
"text": " learning"
},
{
"id": 11043,
"logprob": -0.87158203,
"special": false,
"text": " technique"
},
{
"id": 393,
"logprob": -0.91015625,
"special": false,
"text": " that"
}
]
},
"generated_text": "\nDeep learning is a machine learning technique that"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -9.953125,
"text": "What"
},
{
"id": 338,
"logprob": -1.4121094,
"text": "is"
},
{
"id": 6483,
"logprob": -9.9765625,
"text": "deep"
},
{
"id": 6509,
"logprob": -1.6767578,
"text": "learning"
},
{
"id": 1577,
"logprob": -4.5976562,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.21813965,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -1.4189453,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.0028419495,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.3244629,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.25439453,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.4375,
"special": false,
"text": " a"
},
{
"id": 4933,
"logprob": -1.8105469,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.07116699,
"special": false,
"text": " learning"
},
{
"id": 11043,
"logprob": -0.87158203,
"special": false,
"text": " technique"
},
{
"id": 393,
"logprob": -0.91015625,
"special": false,
"text": " that"
}
]
},
"generated_text": "\nDeep learning is a machine learning technique that"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 1,
"logprob": null,
"text": "<s>"
},
{
"id": 1724,
"logprob": -9.953125,
"text": "What"
},
{
"id": 338,
"logprob": -1.4121094,
"text": "is"
},
{
"id": 6483,
"logprob": -9.9765625,
"text": "deep"
},
{
"id": 6509,
"logprob": -1.6767578,
"text": "learning"
},
{
"id": 1577,
"logprob": -4.5976562,
"text": "?"
}
],
"seed": null,
"tokens": [
{
"id": 13,
"logprob": -0.21813965,
"special": false,
"text": "\n"
},
{
"id": 2772,
"logprob": -1.4189453,
"special": false,
"text": "De"
},
{
"id": 1022,
"logprob": -0.0028419495,
"special": false,
"text": "ep"
},
{
"id": 6509,
"logprob": -0.3244629,
"special": false,
"text": " learning"
},
{
"id": 338,
"logprob": -0.25439453,
"special": false,
"text": " is"
},
{
"id": 263,
"logprob": -0.4375,
"special": false,
"text": " a"
},
{
"id": 4933,
"logprob": -1.8105469,
"special": false,
"text": " machine"
},
{
"id": 6509,
"logprob": -0.07116699,
"special": false,
"text": " learning"
},
{
"id": 11043,
"logprob": -0.87158203,
"special": false,
"text": " technique"
},
{
"id": 393,
"logprob": -0.91015625,
"special": false,
"text": " that"
}
]
},
"generated_text": "\nDeep learning is a machine learning technique that"
}
]

View File

@ -0,0 +1,58 @@
import pytest
@pytest.fixture(scope="module")
def flash_llama_gptq_handle(launcher):
with launcher("huggingface/llama-7b-gptq", num_shard=2, quantize="gptq") as handle:
yield handle
@pytest.fixture(scope="module")
async def flash_llama_gptq(flash_llama_gptq_handle):
await flash_llama_gptq_handle.health(300)
return flash_llama_gptq_handle.client
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_gptq(flash_llama_gptq, response_snapshot):
response = await flash_llama_gptq.generate(
"What is deep learning ?", max_new_tokens=10, decoder_input_details=True
)
assert response.details.generated_tokens == 10
assert response == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_gptq_all_params(flash_llama_gptq, response_snapshot):
response = await flash_llama_gptq.generate(
"What is deep learning ?",
max_new_tokens=10,
repetition_penalty=1.2,
return_full_text=True,
stop_sequences=["test"],
temperature=0.5,
top_p=0.9,
top_k=10,
truncate=5,
typical_p=0.9,
watermark=True,
decoder_input_details=True,
seed=0,
)
assert response.details.generated_tokens == 10
assert response == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
async def test_flash_llama_gptq_load(flash_llama_gptq, generate_load, response_snapshot):
responses = await generate_load(flash_llama_gptq, "What is deep learning ?", max_new_tokens=10, n=4)
assert len(responses) == 4
assert all([r.generated_text == responses[0].generated_text for r in responses])
assert responses == response_snapshot