Adding GPTQ integration tests.

2023-06-19 12:14:17 +00:00 · 2023-06-19 12:14:17 +00:00 · dca0fe2585
parent 16d0fb04ae
commit dca0fe2585
5 changed files with 675 additions and 2 deletions
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@ -232,9 +232,9 @@ def launcher(event_loop):
        if num_shard is not None:
            args.extend(["--num-shard", str(num_shard)])
-        if quantize:
+        if quantize is not None:
            args.append("--quantize")
-            args.append("bitsandbytes")
+            args.append(quantize)
        if trust_remote_code:
            args.append("--trust-remote-code")
--- a/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq.json
+++ b/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq.json
@ -0,0 +1,103 @@
 {
  "details": {
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
    "prefill": [
      {
        "id": 1,
        "logprob": null,
        "text": "<s>"
      },
      {
        "id": 1724,
        "logprob": -9.953125,
        "text": "What"
      },
      {
        "id": 338,
        "logprob": -1.4121094,
        "text": "is"
      },
      {
        "id": 6483,
        "logprob": -9.9765625,
        "text": "deep"
      },
      {
        "id": 6509,
        "logprob": -1.6767578,
        "text": "learning"
      },
      {
        "id": 1577,
        "logprob": -4.5976562,
        "text": "?"
      }
    ],
    "seed": null,
    "tokens": [
      {
        "id": 13,
        "logprob": -0.21813965,
        "special": false,
        "text": "\n"
      },
      {
        "id": 2772,
        "logprob": -1.4130859,
        "special": false,
        "text": "De"
      },
      {
        "id": 1022,
        "logprob": -0.0028419495,
        "special": false,
        "text": "ep"
      },
      {
        "id": 6509,
        "logprob": -0.3244629,
        "special": false,
        "text": " learning"
      },
      {
        "id": 338,
        "logprob": -0.25439453,
        "special": false,
        "text": " is"
      },
      {
        "id": 263,
        "logprob": -0.43774414,
        "special": false,
        "text": " a"
      },
      {
        "id": 4933,
        "logprob": -1.8105469,
        "special": false,
        "text": " machine"
      },
      {
        "id": 6509,
        "logprob": -0.07116699,
        "special": false,
        "text": " learning"
      },
      {
        "id": 11043,
        "logprob": -0.87158203,
        "special": false,
        "text": " technique"
      },
      {
        "id": 393,
        "logprob": -0.91015625,
        "special": false,
        "text": " that"
      }
    ]
  },
  "generated_text": "\nDeep learning is a machine learning technique that"
 }
--- a/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
+++ b/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
@ -0,0 +1,98 @@
 {
  "details": {
    "best_of_sequences": null,
    "finish_reason": "length",
    "generated_tokens": 10,
    "prefill": [
      {
        "id": 1,
        "logprob": null,
        "text": "<s>"
      },
      {
        "id": 338,
        "logprob": -10.8046875,
        "text": "is"
      },
      {
        "id": 6483,
        "logprob": -12.6640625,
        "text": "deep"
      },
      {
        "id": 6509,
        "logprob": -3.3398438,
        "text": "learning"
      },
      {
        "id": 1577,
        "logprob": -8.3828125,
        "text": "?"
      }
    ],
    "seed": 0,
    "tokens": [
      {
        "id": 13,
        "logprob": 0.0,
        "special": false,
        "text": "\n"
      },
      {
        "id": 4013,
        "logprob": -2.6992188,
        "special": false,
        "text": "This"
      },
      {
        "id": 1139,
        "logprob": -0.35668945,
        "special": false,
        "text": " question"
      },
      {
        "id": 756,
        "logprob": -0.08251953,
        "special": false,
        "text": " has"
      },
      {
        "id": 1063,
        "logprob": -0.39697266,
        "special": false,
        "text": " been"
      },
      {
        "id": 4433,
        "logprob": 0.0,
        "special": false,
        "text": " asked"
      },
      {
        "id": 1784,
        "logprob": -0.9248047,
        "special": false,
        "text": " many"
      },
      {
        "id": 3064,
        "logprob": 0.0,
        "special": false,
        "text": " times"
      },
      {
        "id": 1434,
        "logprob": -0.90625,
        "special": false,
        "text": " before"
      },
      {
        "id": 29892,
        "logprob": -0.19580078,
        "special": false,
        "text": ","
      }
    ]
  },
  "generated_text": "What is deep learning ?\nThis question has been asked many times before,"
 }
--- a/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq_load.json
+++ b/integration-tests/models/snapshots/test_flash_llama_gptq/test_flash_llama_gptq_load.json
@ -0,0 +1,414 @@
 [
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 1,
          "logprob": null,
          "text": "<s>"
        },
        {
          "id": 1724,
          "logprob": -9.953125,
          "text": "What"
        },
        {
          "id": 338,
          "logprob": -1.4121094,
          "text": "is"
        },
        {
          "id": 6483,
          "logprob": -9.9765625,
          "text": "deep"
        },
        {
          "id": 6509,
          "logprob": -1.6767578,
          "text": "learning"
        },
        {
          "id": 1577,
          "logprob": -4.5976562,
          "text": "?"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 13,
          "logprob": -0.21813965,
          "special": false,
          "text": "\n"
        },
        {
          "id": 2772,
          "logprob": -1.4189453,
          "special": false,
          "text": "De"
        },
        {
          "id": 1022,
          "logprob": -0.0028419495,
          "special": false,
          "text": "ep"
        },
        {
          "id": 6509,
          "logprob": -0.3244629,
          "special": false,
          "text": " learning"
        },
        {
          "id": 338,
          "logprob": -0.25439453,
          "special": false,
          "text": " is"
        },
        {
          "id": 263,
          "logprob": -0.4375,
          "special": false,
          "text": " a"
        },
        {
          "id": 4933,
          "logprob": -1.8105469,
          "special": false,
          "text": " machine"
        },
        {
          "id": 6509,
          "logprob": -0.07116699,
          "special": false,
          "text": " learning"
        },
        {
          "id": 11043,
          "logprob": -0.87158203,
          "special": false,
          "text": " technique"
        },
        {
          "id": 393,
          "logprob": -0.91015625,
          "special": false,
          "text": " that"
        }
      ]
    },
    "generated_text": "\nDeep learning is a machine learning technique that"
  },
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 1,
          "logprob": null,
          "text": "<s>"
        },
        {
          "id": 1724,
          "logprob": -9.953125,
          "text": "What"
        },
        {
          "id": 338,
          "logprob": -1.4121094,
          "text": "is"
        },
        {
          "id": 6483,
          "logprob": -9.9765625,
          "text": "deep"
        },
        {
          "id": 6509,
          "logprob": -1.6767578,
          "text": "learning"
        },
        {
          "id": 1577,
          "logprob": -4.5976562,
          "text": "?"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 13,
          "logprob": -0.21813965,
          "special": false,
          "text": "\n"
        },
        {
          "id": 2772,
          "logprob": -1.4189453,
          "special": false,
          "text": "De"
        },
        {
          "id": 1022,
          "logprob": -0.0028419495,
          "special": false,
          "text": "ep"
        },
        {
          "id": 6509,
          "logprob": -0.3244629,
          "special": false,
          "text": " learning"
        },
        {
          "id": 338,
          "logprob": -0.25439453,
          "special": false,
          "text": " is"
        },
        {
          "id": 263,
          "logprob": -0.4375,
          "special": false,
          "text": " a"
        },
        {
          "id": 4933,
          "logprob": -1.8105469,
          "special": false,
          "text": " machine"
        },
        {
          "id": 6509,
          "logprob": -0.07116699,
          "special": false,
          "text": " learning"
        },
        {
          "id": 11043,
          "logprob": -0.87158203,
          "special": false,
          "text": " technique"
        },
        {
          "id": 393,
          "logprob": -0.91015625,
          "special": false,
          "text": " that"
        }
      ]
    },
    "generated_text": "\nDeep learning is a machine learning technique that"
  },
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 1,
          "logprob": null,
          "text": "<s>"
        },
        {
          "id": 1724,
          "logprob": -9.953125,
          "text": "What"
        },
        {
          "id": 338,
          "logprob": -1.4121094,
          "text": "is"
        },
        {
          "id": 6483,
          "logprob": -9.9765625,
          "text": "deep"
        },
        {
          "id": 6509,
          "logprob": -1.6767578,
          "text": "learning"
        },
        {
          "id": 1577,
          "logprob": -4.5976562,
          "text": "?"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 13,
          "logprob": -0.21813965,
          "special": false,
          "text": "\n"
        },
        {
          "id": 2772,
          "logprob": -1.4189453,
          "special": false,
          "text": "De"
        },
        {
          "id": 1022,
          "logprob": -0.0028419495,
          "special": false,
          "text": "ep"
        },
        {
          "id": 6509,
          "logprob": -0.3244629,
          "special": false,
          "text": " learning"
        },
        {
          "id": 338,
          "logprob": -0.25439453,
          "special": false,
          "text": " is"
        },
        {
          "id": 263,
          "logprob": -0.4375,
          "special": false,
          "text": " a"
        },
        {
          "id": 4933,
          "logprob": -1.8105469,
          "special": false,
          "text": " machine"
        },
        {
          "id": 6509,
          "logprob": -0.07116699,
          "special": false,
          "text": " learning"
        },
        {
          "id": 11043,
          "logprob": -0.87158203,
          "special": false,
          "text": " technique"
        },
        {
          "id": 393,
          "logprob": -0.91015625,
          "special": false,
          "text": " that"
        }
      ]
    },
    "generated_text": "\nDeep learning is a machine learning technique that"
  },
  {
    "details": {
      "best_of_sequences": null,
      "finish_reason": "length",
      "generated_tokens": 10,
      "prefill": [
        {
          "id": 1,
          "logprob": null,
          "text": "<s>"
        },
        {
          "id": 1724,
          "logprob": -9.953125,
          "text": "What"
        },
        {
          "id": 338,
          "logprob": -1.4121094,
          "text": "is"
        },
        {
          "id": 6483,
          "logprob": -9.9765625,
          "text": "deep"
        },
        {
          "id": 6509,
          "logprob": -1.6767578,
          "text": "learning"
        },
        {
          "id": 1577,
          "logprob": -4.5976562,
          "text": "?"
        }
      ],
      "seed": null,
      "tokens": [
        {
          "id": 13,
          "logprob": -0.21813965,
          "special": false,
          "text": "\n"
        },
        {
          "id": 2772,
          "logprob": -1.4189453,
          "special": false,
          "text": "De"
        },
        {
          "id": 1022,
          "logprob": -0.0028419495,
          "special": false,
          "text": "ep"
        },
        {
          "id": 6509,
          "logprob": -0.3244629,
          "special": false,
          "text": " learning"
        },
        {
          "id": 338,
          "logprob": -0.25439453,
          "special": false,
          "text": " is"
        },
        {
          "id": 263,
          "logprob": -0.4375,
          "special": false,
          "text": " a"
        },
        {
          "id": 4933,
          "logprob": -1.8105469,
          "special": false,
          "text": " machine"
        },
        {
          "id": 6509,
          "logprob": -0.07116699,
          "special": false,
          "text": " learning"
        },
        {
          "id": 11043,
          "logprob": -0.87158203,
          "special": false,
          "text": " technique"
        },
        {
          "id": 393,
          "logprob": -0.91015625,
          "special": false,
          "text": " that"
        }
      ]
    },
    "generated_text": "\nDeep learning is a machine learning technique that"
  }
 ]
--- a/integration-tests/models/test_flash_llama_gptq.py
+++ b/integration-tests/models/test_flash_llama_gptq.py
@ -0,0 +1,58 @@
 import pytest
@pytest.fixture(scope="module")
 def flash_llama_gptq_handle(launcher):
    with launcher("huggingface/llama-7b-gptq", num_shard=2, quantize="gptq") as handle:
        yield handle
@pytest.fixture(scope="module")
 async def flash_llama_gptq(flash_llama_gptq_handle):
    await flash_llama_gptq_handle.health(300)
    return flash_llama_gptq_handle.client
@pytest.mark.asyncio
@pytest.mark.private
 async def test_flash_llama_gptq(flash_llama_gptq, response_snapshot):
    response = await flash_llama_gptq.generate(
        "What is deep learning ?", max_new_tokens=10, decoder_input_details=True
    )
    assert response.details.generated_tokens == 10
    assert response == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
 async def test_flash_llama_gptq_all_params(flash_llama_gptq, response_snapshot):
    response = await flash_llama_gptq.generate(
        "What is deep learning ?",
        max_new_tokens=10,
        repetition_penalty=1.2,
        return_full_text=True,
        stop_sequences=["test"],
        temperature=0.5,
        top_p=0.9,
        top_k=10,
        truncate=5,
        typical_p=0.9,
        watermark=True,
        decoder_input_details=True,
        seed=0,
    )
    assert response.details.generated_tokens == 10
    assert response == response_snapshot
@pytest.mark.asyncio
@pytest.mark.private
 async def test_flash_llama_gptq_load(flash_llama_gptq, generate_load, response_snapshot):
    responses = await generate_load(flash_llama_gptq, "What is deep learning ?", max_new_tokens=10, n=4)
    assert len(responses) == 4
    assert all([r.generated_text == responses[0].generated_text for r in responses])
    assert responses == response_snapshot