Add pytest release marker (#2114)
* Add pytest release marker Annotate a test with `@pytest.mark.release` and it only gets run with `pytest integration-tests --release`. * Mark many models as `release` to speed up CI
This commit is contained in:
parent
e563983d90
commit
fc9c3153e5
|
@ -156,6 +156,8 @@ jobs:
|
||||||
needs: build-and-push
|
needs: build-and-push
|
||||||
runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"]
|
runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"]
|
||||||
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
|
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
|
||||||
|
env:
|
||||||
|
PYTEST_FLAGS: ${{ github.ref == 'refs/heads/main' && '--release' || '' }}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
@ -180,4 +182,4 @@ jobs:
|
||||||
export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
|
export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
|
||||||
export HF_TOKEN=${{ secrets.HF_TOKEN }}
|
export HF_TOKEN=${{ secrets.HF_TOKEN }}
|
||||||
echo $DOCKER_IMAGE
|
echo $DOCKER_IMAGE
|
||||||
pytest -s -vv integration-tests
|
pytest -s -vv integration-tests ${PYTEST_FLAGS}
|
||||||
|
|
|
@ -37,6 +37,26 @@ DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data")
|
||||||
DOCKER_DEVICES = os.getenv("DOCKER_DEVICES")
|
DOCKER_DEVICES = os.getenv("DOCKER_DEVICES")
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_addoption(parser):
|
||||||
|
parser.addoption(
|
||||||
|
"--release", action="store_true", default=False, help="run release tests"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_configure(config):
|
||||||
|
config.addinivalue_line("markers", "release: mark test as a release-only test")
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_collection_modifyitems(config, items):
|
||||||
|
if config.getoption("--release"):
|
||||||
|
# --release given in cli: do not skip release tests
|
||||||
|
return
|
||||||
|
skip_release = pytest.mark.skip(reason="need --release option to run")
|
||||||
|
for item in items:
|
||||||
|
if "release" in item.keywords:
|
||||||
|
item.add_marker(skip_release)
|
||||||
|
|
||||||
|
|
||||||
class ResponseComparator(JSONSnapshotExtension):
|
class ResponseComparator(JSONSnapshotExtension):
|
||||||
rtol = 0.2
|
rtol = 0.2
|
||||||
ignore_logprob = False
|
ignore_logprob = False
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def bloom_560(bloom_560_handle):
|
||||||
return bloom_560_handle.client
|
return bloom_560_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_bloom_560m(bloom_560, response_snapshot):
|
async def test_bloom_560m(bloom_560, response_snapshot):
|
||||||
response = await bloom_560.generate(
|
response = await bloom_560.generate(
|
||||||
|
@ -27,6 +28,7 @@ async def test_bloom_560m(bloom_560, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_bloom_560m_all_params(bloom_560, response_snapshot):
|
async def test_bloom_560m_all_params(bloom_560, response_snapshot):
|
||||||
response = await bloom_560.generate(
|
response = await bloom_560.generate(
|
||||||
|
@ -49,6 +51,7 @@ async def test_bloom_560m_all_params(bloom_560, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_bloom_560m_load(bloom_560, generate_load, response_snapshot):
|
async def test_bloom_560m_load(bloom_560, generate_load, response_snapshot):
|
||||||
responses = await generate_load(
|
responses = await generate_load(
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def bloom_560m_sharded(bloom_560m_sharded_handle):
|
||||||
return bloom_560m_sharded_handle.client
|
return bloom_560m_sharded_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_bloom_560m_sharded(bloom_560m_sharded, response_snapshot):
|
async def test_bloom_560m_sharded(bloom_560m_sharded, response_snapshot):
|
||||||
response = await bloom_560m_sharded.generate(
|
response = await bloom_560m_sharded.generate(
|
||||||
|
@ -27,6 +28,7 @@ async def test_bloom_560m_sharded(bloom_560m_sharded, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_bloom_560m_sharded_load(
|
async def test_bloom_560m_sharded_load(
|
||||||
bloom_560m_sharded, generate_load, response_snapshot
|
bloom_560m_sharded, generate_load, response_snapshot
|
||||||
|
|
|
@ -26,6 +26,7 @@ async def flash_llama_completion(flash_llama_completion_handle):
|
||||||
# method for it. Instead, we use the `requests` library to make the HTTP request directly.
|
# method for it. Instead, we use the `requests` library to make the HTTP request directly.
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
def test_flash_llama_completion_single_prompt(
|
def test_flash_llama_completion_single_prompt(
|
||||||
flash_llama_completion, response_snapshot
|
flash_llama_completion, response_snapshot
|
||||||
):
|
):
|
||||||
|
@ -46,6 +47,7 @@ def test_flash_llama_completion_single_prompt(
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
def test_flash_llama_completion_many_prompts(flash_llama_completion, response_snapshot):
|
def test_flash_llama_completion_many_prompts(flash_llama_completion, response_snapshot):
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{flash_llama_completion.base_url}/v1/completions",
|
f"{flash_llama_completion.base_url}/v1/completions",
|
||||||
|
@ -68,6 +70,7 @@ def test_flash_llama_completion_many_prompts(flash_llama_completion, response_sn
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
async def test_flash_llama_completion_many_prompts_stream(
|
async def test_flash_llama_completion_many_prompts_stream(
|
||||||
flash_llama_completion, response_snapshot
|
flash_llama_completion, response_snapshot
|
||||||
):
|
):
|
||||||
|
|
|
@ -17,6 +17,7 @@ async def flash_llama_awq(flash_llama_awq_handle):
|
||||||
return flash_llama_awq_handle.client
|
return flash_llama_awq_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
|
async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
|
||||||
response = await flash_llama_awq.generate(
|
response = await flash_llama_awq.generate(
|
||||||
|
@ -31,6 +32,7 @@ async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
|
async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
|
||||||
response = await flash_llama_awq.generate(
|
response = await flash_llama_awq.generate(
|
||||||
|
@ -52,6 +54,7 @@ async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_llama_awq_load(flash_llama_awq, generate_load, response_snapshot):
|
async def test_flash_llama_awq_load(flash_llama_awq, generate_load, response_snapshot):
|
||||||
responses = await generate_load(
|
responses = await generate_load(
|
||||||
|
|
|
@ -17,6 +17,7 @@ async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
|
||||||
return flash_llama_awq_handle_sharded.client
|
return flash_llama_awq_handle_sharded.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapshot):
|
async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapshot):
|
||||||
response = await flash_llama_awq_sharded.generate(
|
response = await flash_llama_awq_sharded.generate(
|
||||||
|
@ -31,6 +32,7 @@ async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapsho
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_llama_awq_load_sharded(
|
async def test_flash_llama_awq_load_sharded(
|
||||||
flash_llama_awq_sharded, generate_load, response_snapshot
|
flash_llama_awq_sharded, generate_load, response_snapshot
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_falcon(flash_falcon_handle):
|
||||||
return flash_falcon_handle.client
|
return flash_falcon_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_falcon(flash_falcon, response_snapshot):
|
async def test_flash_falcon(flash_falcon, response_snapshot):
|
||||||
|
@ -26,6 +27,7 @@ async def test_flash_falcon(flash_falcon, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_falcon_all_params(flash_falcon, response_snapshot):
|
async def test_flash_falcon_all_params(flash_falcon, response_snapshot):
|
||||||
|
@ -49,6 +51,7 @@ async def test_flash_falcon_all_params(flash_falcon, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_falcon_load(flash_falcon, generate_load, response_snapshot):
|
async def test_flash_falcon_load(flash_falcon, generate_load, response_snapshot):
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_gemma(flash_gemma_handle):
|
||||||
return flash_gemma_handle.client
|
return flash_gemma_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_gemma(flash_gemma, response_snapshot):
|
async def test_flash_gemma(flash_gemma, response_snapshot):
|
||||||
|
@ -24,6 +25,7 @@ async def test_flash_gemma(flash_gemma, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_gemma_all_params(flash_gemma, response_snapshot):
|
async def test_flash_gemma_all_params(flash_gemma, response_snapshot):
|
||||||
|
@ -47,6 +49,7 @@ async def test_flash_gemma_all_params(flash_gemma, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_gemma_load(flash_gemma, generate_load, response_snapshot):
|
async def test_flash_gemma_load(flash_gemma, generate_load, response_snapshot):
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_gemma_gptq(flash_gemma_gptq_handle):
|
||||||
return flash_gemma_gptq_handle.client
|
return flash_gemma_gptq_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_gemma_gptq(flash_gemma_gptq, ignore_logprob_response_snapshot):
|
async def test_flash_gemma_gptq(flash_gemma_gptq, ignore_logprob_response_snapshot):
|
||||||
|
@ -24,6 +25,7 @@ async def test_flash_gemma_gptq(flash_gemma_gptq, ignore_logprob_response_snapsh
|
||||||
assert response == ignore_logprob_response_snapshot
|
assert response == ignore_logprob_response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_gemma_gptq_all_params(
|
async def test_flash_gemma_gptq_all_params(
|
||||||
|
@ -49,6 +51,7 @@ async def test_flash_gemma_gptq_all_params(
|
||||||
assert response == ignore_logprob_response_snapshot
|
assert response == ignore_logprob_response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_gemma_gptq_load(
|
async def test_flash_gemma_gptq_load(
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_gpt2(flash_gpt2_handle):
|
||||||
return flash_gpt2_handle.client
|
return flash_gpt2_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_gpt2(flash_gpt2, response_snapshot):
|
async def test_flash_gpt2(flash_gpt2, response_snapshot):
|
||||||
response = await flash_gpt2.generate(
|
response = await flash_gpt2.generate(
|
||||||
|
@ -25,6 +26,7 @@ async def test_flash_gpt2(flash_gpt2, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_gpt2_load(flash_gpt2, generate_load, response_snapshot):
|
async def test_flash_gpt2_load(flash_gpt2, generate_load, response_snapshot):
|
||||||
responses = await generate_load(
|
responses = await generate_load(
|
||||||
|
|
|
@ -21,6 +21,7 @@ async def flash_llama_exl2(flash_llama_exl2_handle):
|
||||||
return flash_llama_exl2_handle.client
|
return flash_llama_exl2_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_exl2(flash_llama_exl2, ignore_logprob_response_snapshot):
|
async def test_flash_llama_exl2(flash_llama_exl2, ignore_logprob_response_snapshot):
|
||||||
|
@ -32,6 +33,7 @@ async def test_flash_llama_exl2(flash_llama_exl2, ignore_logprob_response_snapsh
|
||||||
assert response == ignore_logprob_response_snapshot
|
assert response == ignore_logprob_response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_exl2_all_params(
|
async def test_flash_llama_exl2_all_params(
|
||||||
|
@ -58,6 +60,7 @@ async def test_flash_llama_exl2_all_params(
|
||||||
assert response == ignore_logprob_response_snapshot
|
assert response == ignore_logprob_response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_exl2_load(
|
async def test_flash_llama_exl2_load(
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_llama_gptq(flash_llama_gptq_handle):
|
||||||
return flash_llama_gptq_handle.client
|
return flash_llama_gptq_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_gptq(flash_llama_gptq, response_snapshot):
|
async def test_flash_llama_gptq(flash_llama_gptq, response_snapshot):
|
||||||
|
@ -24,6 +25,7 @@ async def test_flash_llama_gptq(flash_llama_gptq, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_gptq_all_params(flash_llama_gptq, response_snapshot):
|
async def test_flash_llama_gptq_all_params(flash_llama_gptq, response_snapshot):
|
||||||
|
@ -46,6 +48,7 @@ async def test_flash_llama_gptq_all_params(flash_llama_gptq, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_gptq_load(
|
async def test_flash_llama_gptq_load(
|
||||||
|
|
|
@ -15,6 +15,7 @@ async def flash_llama_gptq_marlin(flash_llama_gptq_marlin_handle):
|
||||||
return flash_llama_gptq_marlin_handle.client
|
return flash_llama_gptq_marlin_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_gptq_marlin(flash_llama_gptq_marlin, response_snapshot):
|
async def test_flash_llama_gptq_marlin(flash_llama_gptq_marlin, response_snapshot):
|
||||||
|
@ -26,6 +27,7 @@ async def test_flash_llama_gptq_marlin(flash_llama_gptq_marlin, response_snapsho
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_gptq_marlin_all_params(
|
async def test_flash_llama_gptq_marlin_all_params(
|
||||||
|
@ -50,6 +52,7 @@ async def test_flash_llama_gptq_marlin_all_params(
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_gptq_marlin_load(
|
async def test_flash_llama_gptq_marlin_load(
|
||||||
|
|
|
@ -15,6 +15,7 @@ async def flash_llama_marlin(flash_llama_marlin_handle):
|
||||||
return flash_llama_marlin_handle.client
|
return flash_llama_marlin_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_marlin(flash_llama_marlin, response_snapshot):
|
async def test_flash_llama_marlin(flash_llama_marlin, response_snapshot):
|
||||||
|
@ -26,6 +27,7 @@ async def test_flash_llama_marlin(flash_llama_marlin, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_marlin_all_params(flash_llama_marlin, response_snapshot):
|
async def test_flash_llama_marlin_all_params(flash_llama_marlin, response_snapshot):
|
||||||
|
@ -48,6 +50,7 @@ async def test_flash_llama_marlin_all_params(flash_llama_marlin, response_snapsh
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llama_marlin_load(
|
async def test_flash_llama_marlin_load(
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_neox(flash_neox_handle):
|
||||||
return flash_neox_handle.client
|
return flash_neox_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.skip
|
@pytest.mark.skip
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_neox(flash_neox, response_snapshot):
|
async def test_flash_neox(flash_neox, response_snapshot):
|
||||||
|
@ -26,6 +27,7 @@ async def test_flash_neox(flash_neox, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.skip
|
@pytest.mark.skip
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
|
async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_neox_sharded(flash_neox_sharded_handle):
|
||||||
return flash_neox_sharded_handle.client
|
return flash_neox_sharded_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_neox(flash_neox_sharded, response_snapshot):
|
async def test_flash_neox(flash_neox_sharded, response_snapshot):
|
||||||
response = await flash_neox_sharded.generate(
|
response = await flash_neox_sharded.generate(
|
||||||
|
@ -25,6 +26,7 @@ async def test_flash_neox(flash_neox_sharded, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_neox_load(flash_neox_sharded, generate_load, response_snapshot):
|
async def test_flash_neox_load(flash_neox_sharded, generate_load, response_snapshot):
|
||||||
responses = await generate_load(
|
responses = await generate_load(
|
||||||
|
|
|
@ -34,6 +34,7 @@ def get_cow_beach():
|
||||||
return f"data:image/png;base64,{encoded_string.decode('utf-8')}"
|
return f"data:image/png;base64,{encoded_string.decode('utf-8')}"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_pali_gemma(flash_pali_gemma, response_snapshot):
|
async def test_flash_pali_gemma(flash_pali_gemma, response_snapshot):
|
||||||
|
@ -45,6 +46,7 @@ async def test_flash_pali_gemma(flash_pali_gemma, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_pali_gemma_two_images(flash_pali_gemma, response_snapshot):
|
async def test_flash_pali_gemma_two_images(flash_pali_gemma, response_snapshot):
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_phi(flash_phi_handle):
|
||||||
return flash_phi_handle.client
|
return flash_phi_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_phi(flash_phi, response_snapshot):
|
async def test_flash_phi(flash_phi, response_snapshot):
|
||||||
response = await flash_phi.generate(
|
response = await flash_phi.generate(
|
||||||
|
@ -24,6 +25,7 @@ async def test_flash_phi(flash_phi, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_phi_all_params(flash_phi, response_snapshot):
|
async def test_flash_phi_all_params(flash_phi, response_snapshot):
|
||||||
response = await flash_phi.generate(
|
response = await flash_phi.generate(
|
||||||
|
@ -47,6 +49,7 @@ async def test_flash_phi_all_params(flash_phi, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_phi_load(flash_phi, generate_load, response_snapshot):
|
async def test_flash_phi_load(flash_phi, generate_load, response_snapshot):
|
||||||
responses = await generate_load(flash_phi, "Test request", max_new_tokens=10, n=4)
|
responses = await generate_load(flash_phi, "Test request", max_new_tokens=10, n=4)
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_qwen2(flash_qwen2_handle):
|
||||||
return flash_qwen2_handle.client
|
return flash_qwen2_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_qwen2(flash_qwen2, response_snapshot):
|
async def test_flash_qwen2(flash_qwen2, response_snapshot):
|
||||||
response = await flash_qwen2.generate(
|
response = await flash_qwen2.generate(
|
||||||
|
@ -24,6 +25,7 @@ async def test_flash_qwen2(flash_qwen2, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_qwen2_all_params(flash_qwen2, response_snapshot):
|
async def test_flash_qwen2_all_params(flash_qwen2, response_snapshot):
|
||||||
response = await flash_qwen2.generate(
|
response = await flash_qwen2.generate(
|
||||||
|
@ -46,6 +48,7 @@ async def test_flash_qwen2_all_params(flash_qwen2, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_qwen2_load(flash_qwen2, generate_load, response_snapshot):
|
async def test_flash_qwen2_load(flash_qwen2, generate_load, response_snapshot):
|
||||||
responses = await generate_load(flash_qwen2, "Test request", max_new_tokens=10, n=4)
|
responses = await generate_load(flash_qwen2, "Test request", max_new_tokens=10, n=4)
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_santacoder(flash_santacoder_handle):
|
||||||
return flash_santacoder_handle.client
|
return flash_santacoder_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_santacoder(flash_santacoder, response_snapshot):
|
async def test_flash_santacoder(flash_santacoder, response_snapshot):
|
||||||
response = await flash_santacoder.generate(
|
response = await flash_santacoder.generate(
|
||||||
|
@ -23,6 +24,7 @@ async def test_flash_santacoder(flash_santacoder, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_santacoder_load(
|
async def test_flash_santacoder_load(
|
||||||
flash_santacoder, generate_load, response_snapshot
|
flash_santacoder, generate_load, response_snapshot
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_starcoder(flash_starcoder_handle):
|
||||||
return flash_starcoder_handle.client
|
return flash_starcoder_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_starcoder(flash_starcoder, response_snapshot):
|
async def test_flash_starcoder(flash_starcoder, response_snapshot):
|
||||||
|
@ -24,6 +25,7 @@ async def test_flash_starcoder(flash_starcoder, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_starcoder_default_params(flash_starcoder, response_snapshot):
|
async def test_flash_starcoder_default_params(flash_starcoder, response_snapshot):
|
||||||
|
@ -40,6 +42,7 @@ async def test_flash_starcoder_default_params(flash_starcoder, response_snapshot
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_starcoder_load(flash_starcoder, generate_load, response_snapshot):
|
async def test_flash_starcoder_load(flash_starcoder, generate_load, response_snapshot):
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_starcoder2(flash_starcoder2_handle):
|
||||||
return flash_starcoder2_handle.client
|
return flash_starcoder2_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_starcoder2(flash_starcoder2, response_snapshot):
|
async def test_flash_starcoder2(flash_starcoder2, response_snapshot):
|
||||||
|
@ -24,6 +25,7 @@ async def test_flash_starcoder2(flash_starcoder2, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_starcoder2_default_params(flash_starcoder2, response_snapshot):
|
async def test_flash_starcoder2_default_params(flash_starcoder2, response_snapshot):
|
||||||
|
@ -40,6 +42,7 @@ async def test_flash_starcoder2_default_params(flash_starcoder2, response_snapsh
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_starcoder2_load(
|
async def test_flash_starcoder2_load(
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def flash_starcoder_gptq(flash_starcoder_gptq_handle):
|
||||||
return flash_starcoder_gptq_handle.client
|
return flash_starcoder_gptq_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_starcoder_gptq(flash_starcoder_gptq, generous_response_snapshot):
|
async def test_flash_starcoder_gptq(flash_starcoder_gptq, generous_response_snapshot):
|
||||||
response = await flash_starcoder_gptq.generate(
|
response = await flash_starcoder_gptq.generate(
|
||||||
|
@ -24,6 +25,7 @@ async def test_flash_starcoder_gptq(flash_starcoder_gptq, generous_response_snap
|
||||||
assert response == generous_response_snapshot
|
assert response == generous_response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_starcoder_gptq_default_params(
|
async def test_flash_starcoder_gptq_default_params(
|
||||||
flash_starcoder_gptq, generous_response_snapshot
|
flash_starcoder_gptq, generous_response_snapshot
|
||||||
|
@ -40,6 +42,7 @@ async def test_flash_starcoder_gptq_default_params(
|
||||||
assert response == generous_response_snapshot
|
assert response == generous_response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_flash_starcoder_gptq_load(
|
async def test_flash_starcoder_gptq_load(
|
||||||
flash_starcoder_gptq, generate_load, generous_response_snapshot
|
flash_starcoder_gptq, generate_load, generous_response_snapshot
|
||||||
|
|
|
@ -21,6 +21,7 @@ async def non_flash_llama_grammar(non_flash_llama_grammar_handle):
|
||||||
return non_flash_llama_grammar_handle.client
|
return non_flash_llama_grammar_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.skip
|
@pytest.mark.skip
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_non_flash_llama_grammar_json(non_flash_llama_grammar, response_snapshot):
|
async def test_non_flash_llama_grammar_json(non_flash_llama_grammar, response_snapshot):
|
||||||
|
|
|
@ -22,6 +22,7 @@ async def llama_grammar(llama_grammar_handle):
|
||||||
return llama_grammar_handle.client
|
return llama_grammar_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_grammar_response_format_llama_json(llama_grammar, response_snapshot):
|
async def test_grammar_response_format_llama_json(llama_grammar, response_snapshot):
|
||||||
|
|
||||||
|
@ -62,6 +63,7 @@ async def test_grammar_response_format_llama_json(llama_grammar, response_snapsh
|
||||||
assert chat_completion == response_snapshot
|
assert chat_completion == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_grammar_response_format_llama_error_if_tools_not_installed(
|
async def test_grammar_response_format_llama_error_if_tools_not_installed(
|
||||||
llama_grammar,
|
llama_grammar,
|
||||||
|
|
|
@ -45,6 +45,7 @@ async def test_idefics(idefics, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_idefics_two_images(idefics, response_snapshot):
|
async def test_idefics_two_images(idefics, response_snapshot):
|
||||||
|
@ -60,6 +61,7 @@ async def test_idefics_two_images(idefics, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_idefics_load(idefics, generate_load, response_snapshot):
|
async def test_idefics_load(idefics, generate_load, response_snapshot):
|
||||||
chicken = get_chicken()
|
chicken = get_chicken()
|
||||||
|
|
|
@ -26,6 +26,7 @@ async def flash_llava_next(flash_llava_next_handle):
|
||||||
return flash_llava_next_handle.client
|
return flash_llava_next_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llava_next_simple(flash_llava_next, response_snapshot):
|
async def test_flash_llava_next_simple(flash_llava_next, response_snapshot):
|
||||||
|
@ -41,6 +42,7 @@ async def test_flash_llava_next_simple(flash_llava_next, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llava_next_all_params(flash_llava_next, response_snapshot):
|
async def test_flash_llava_next_all_params(flash_llava_next, response_snapshot):
|
||||||
|
@ -64,6 +66,7 @@ async def test_flash_llava_next_all_params(flash_llava_next, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@pytest.mark.private
|
@pytest.mark.private
|
||||||
async def test_flash_llava_next_load(
|
async def test_flash_llava_next_load(
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def fused_kernel_mamba(fused_kernel_mamba_handle):
|
||||||
return fused_kernel_mamba_handle.client
|
return fused_kernel_mamba_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mamba(fused_kernel_mamba, response_snapshot):
|
async def test_mamba(fused_kernel_mamba, response_snapshot):
|
||||||
response = await fused_kernel_mamba.generate(
|
response = await fused_kernel_mamba.generate(
|
||||||
|
@ -24,6 +25,7 @@ async def test_mamba(fused_kernel_mamba, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
|
async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
|
||||||
response = await fused_kernel_mamba.generate(
|
response = await fused_kernel_mamba.generate(
|
||||||
|
@ -50,6 +52,7 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mamba_load(
|
async def test_mamba_load(
|
||||||
fused_kernel_mamba, generate_load, generous_response_snapshot
|
fused_kernel_mamba, generate_load, generous_response_snapshot
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def mpt_sharded(mpt_sharded_handle):
|
||||||
return mpt_sharded_handle.client
|
return mpt_sharded_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mpt(mpt_sharded, response_snapshot):
|
async def test_mpt(mpt_sharded, response_snapshot):
|
||||||
response = await mpt_sharded.generate(
|
response = await mpt_sharded.generate(
|
||||||
|
@ -29,6 +30,7 @@ async def test_mpt(mpt_sharded, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mpt_load(mpt_sharded, generate_load, response_snapshot):
|
async def test_mpt_load(mpt_sharded, generate_load, response_snapshot):
|
||||||
responses = await generate_load(
|
responses = await generate_load(
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def mt0_base(mt0_base_handle):
|
||||||
return mt0_base_handle.client
|
return mt0_base_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mt0_base(mt0_base, response_snapshot):
|
async def test_mt0_base(mt0_base, response_snapshot):
|
||||||
response = await mt0_base.generate(
|
response = await mt0_base.generate(
|
||||||
|
@ -27,6 +28,7 @@ async def test_mt0_base(mt0_base, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mt0_base_all_params(mt0_base, response_snapshot):
|
async def test_mt0_base_all_params(mt0_base, response_snapshot):
|
||||||
response = await mt0_base.generate(
|
response = await mt0_base.generate(
|
||||||
|
@ -49,6 +51,7 @@ async def test_mt0_base_all_params(mt0_base, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_mt0_base_load(mt0_base, generate_load, response_snapshot):
|
async def test_mt0_base_load(mt0_base, generate_load, response_snapshot):
|
||||||
responses = await generate_load(
|
responses = await generate_load(
|
||||||
|
|
|
@ -15,6 +15,7 @@ async def neox(neox_handle):
|
||||||
return neox_handle.client
|
return neox_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.skip
|
@pytest.mark.skip
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_neox(neox, response_snapshot):
|
async def test_neox(neox, response_snapshot):
|
||||||
|
@ -28,6 +29,7 @@ async def test_neox(neox, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.skip
|
@pytest.mark.skip
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_neox_load(neox, generate_load, response_snapshot):
|
async def test_neox_load(neox, generate_load, response_snapshot):
|
||||||
|
|
|
@ -15,6 +15,7 @@ async def neox_sharded(neox_sharded_handle):
|
||||||
return neox_sharded_handle.client
|
return neox_sharded_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.skip
|
@pytest.mark.skip
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_neox(neox_sharded, response_snapshot):
|
async def test_neox(neox_sharded, response_snapshot):
|
||||||
|
@ -28,6 +29,7 @@ async def test_neox(neox_sharded, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.skip
|
@pytest.mark.skip
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_neox_load(neox_sharded, generate_load, response_snapshot):
|
async def test_neox_load(neox_sharded, generate_load, response_snapshot):
|
||||||
|
|
|
@ -13,6 +13,7 @@ async def t5_sharded(t5_sharded_handle):
|
||||||
return t5_sharded_handle.client
|
return t5_sharded_handle.client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_t5_sharded(t5_sharded, response_snapshot):
|
async def test_t5_sharded(t5_sharded, response_snapshot):
|
||||||
response = await t5_sharded.generate(
|
response = await t5_sharded.generate(
|
||||||
|
@ -24,6 +25,7 @@ async def test_t5_sharded(t5_sharded, response_snapshot):
|
||||||
assert response == response_snapshot
|
assert response == response_snapshot
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.release
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_t5_sharded_load(t5_sharded, generate_load, response_snapshot):
|
async def test_t5_sharded_load(t5_sharded, generate_load, response_snapshot):
|
||||||
responses = await generate_load(
|
responses = await generate_load(
|
||||||
|
|
Loading…
Reference in New Issue