diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index dcfde28a..ad6d9827 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -160,7 +160,7 @@ jobs: if [[ ${{ inputs.hardware }} == "rocm" ]] then - echo "docker_volume=/data/cache/.cache/huggingface/hub" + echo "docker_volume=/data/cache/.cache/huggingface/hub" >> "$GITHUB_OUTPUT" else echo "docker_volume=/mnt/cache" >> "$GITHUB_OUTPUT" fi diff --git a/integration-tests/models/test_flash_gemma_gptq.py b/integration-tests/models/test_flash_gemma_gptq.py index 14a8075d..8dc674b6 100644 --- a/integration-tests/models/test_flash_gemma_gptq.py +++ b/integration-tests/models/test_flash_gemma_gptq.py @@ -4,6 +4,7 @@ from testing_utils import require_backend_async, require_backend # These tests do not pass on ROCm, that does not support head_dim > 128 (2b model is 256). + @pytest.fixture(scope="module") @require_backend("cuda", "xpu") def flash_gemma_gptq_handle(launcher): diff --git a/integration-tests/models/testing_utils.py b/integration-tests/models/testing_utils.py index 606a24c0..759e9de7 100644 --- a/integration-tests/models/testing_utils.py +++ b/integration-tests/models/testing_utils.py @@ -51,6 +51,7 @@ def is_flaky_async( return decorator + def require_backend(*args): def decorator(func): @functools.wraps(func)