Intel ci (#2630)

* Intel CI ? * Let's try non sharded gemma. * Snapshot rename * Apparently container can be gone already.
2024-10-10 16:51:57 +02:00 · 2024-10-10 16:51:57 +02:00 · 3dbdf63ec5
parent d912f0bf55
commit 3dbdf63ec5
6 changed files with 9 additions and 6 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -75,10 +75,10 @@ jobs:
                export label_extension="-intel-cpu"
                export docker_devices="none"
                export docker_volume="/mnt/cache"
-                export runs_on="ubuntu-latest"
+                # export runs_on="ubuntu-latest"
-                # export runs_on="aws-highmemory-32-plus-priv"
+                export runs_on="aws-highmemory-32-plus-priv"
                export platform="cpu"
-                export extra_pytest="-k test_flash_llama_load"
+                export extra_pytest="-k test_flash_gemma_simple"
                ;;
          esac
          echo $dockerfile
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@ -572,7 +572,10 @@ def launcher(event_loop):
            print(container_output, file=sys.stderr)
        finally:
            try:
                container.remove()
            except Exception:
                pass
    if DOCKER_IMAGE is not None:
        return docker_launcher
--- a/integration-tests/models/snapshots/test_flash_gemma/test_flash_gemma_simple.json
+++ b/integration-tests/models/snapshots/test_flash_gemma/test_flash_gemma_simple.json
--- a/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_simple.json
+++ b/integration-tests/models/snapshots/test_flash_llama/test_flash_llama_simple.json
--- a/integration-tests/models/test_flash_gemma.py
+++ b/integration-tests/models/test_flash_gemma.py
@ -16,7 +16,7 @@ async def flash_gemma(flash_gemma_handle):
@pytest.mark.release
@pytest.mark.asyncio
@pytest.mark.private
-async def test_flash_gemma(flash_gemma, response_snapshot):
+async def test_flash_gemma_simple(flash_gemma, response_snapshot):
    response = await flash_gemma.generate(
        "Test request", max_new_tokens=10, decoder_input_details=True
    )
--- a/integration-tests/models/test_flash_llama.py
+++ b/integration-tests/models/test_flash_llama.py
@ -15,7 +15,7 @@ async def flash_llama(flash_llama_handle):
@pytest.mark.asyncio
@pytest.mark.private
-async def test_flash_llama(flash_llama, response_snapshot):
+async def test_flash_llama_simple(flash_llama, response_snapshot):
    response = await flash_llama.generate(
        "Test request", max_new_tokens=10, decoder_input_details=True
    )