diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index ce1cdc33..d0aaea27 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -21,9 +21,11 @@ jobs: build-and-push: outputs: docker_image: ${{ steps.final.outputs.docker_image }} + docker_volume: ${{ steps.final.outputs.docker_volume }} docker_devices: ${{ steps.final.outputs.docker_devices }} runs_on: ${{ steps.final.outputs.runs_on }} label: ${{ steps.final.outputs.label }} + extra_pytest: ${{ steps.final.outputs.extra_pytest }} concurrency: group: ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true @@ -44,32 +46,39 @@ jobs: cuda) export dockerfile="Dockerfile" export label_extension="" + export docker_volume="/mnt/cache" export docker_devices="" export runs_on="aws-g6-12xl-plus-priv-cache" export platform="" + export extra_pytest="" ;; rocm) export dockerfile="Dockerfile_amd" export label_extension="-rocm" export docker_devices="/dev/kfd,/dev/dri" - # TODO Re-enable when they pass. - # export runs_on="amd-gpu-tgi" - export runs_on="ubuntu-latest" + export docker_volume="/mnt" + export runs_on="amd-gpu-runners" export platform="" + export extra_pytest="-k test_flash_gemma_gptq_load" ;; intel-xpu) export dockerfile="Dockerfile_intel" export label_extension="-intel-xpu" export docker_devices="" + export docker_volume="/mnt/cache" export runs_on="ubuntu-latest" export platform="xpu" + export extra_pytest="" ;; intel-cpu) export dockerfile="Dockerfile_intel" export label_extension="-intel-cpu" - export docker_devices="" + export docker_devices="none" + export docker_volume="/mnt/cache" export runs_on="ubuntu-latest" + # export runs_on="aws-highmemory-32-plus-priv" export platform="cpu" + export extra_pytest="-k test_flash_llama_load" ;; esac echo $dockerfile @@ -81,8 +90,10 @@ jobs: echo "DOCKERFILE=${dockerfile}" >> $GITHUB_ENV echo "LABEL=${label_extension}" >> $GITHUB_ENV echo "PLATFORM=${platform}" >> $GITHUB_ENV + echo "DOCKER_VOLUME=${docker_volume}" >> $GITHUB_ENV echo "DOCKER_DEVICES=${docker_devices}" >> $GITHUB_ENV echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV + echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV - name: Initialize Docker Buildx uses: docker/setup-buildx-action@v3 @@ -157,16 +168,18 @@ jobs: run: | echo "docker_image=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT" echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT" + echo "docker_volume=${{ env.DOCKER_VOLUME }}" >> "$GITHUB_OUTPUT" echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT" echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT" + echo "extra_pytest=${{ env.EXTRA_PYTEST }}" >> "$GITHUB_OUTPUT" integration_tests: concurrency: group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true needs: build-and-push + if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' runs-on: group: ${{ needs.build-and-push.outputs.runs_on }} - if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' env: PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }} steps: @@ -177,15 +190,16 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.10" + python-version: "3.11" - name: Install run: | make install-integration-tests - name: Run tests run: | - export DOCKER_VOLUME=/mnt/cache + export DOCKER_VOLUME=${{ needs.build-and-push.outputs.docker_volume }} export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }} export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }} + export EXTRA_PYTEST="${{ needs.build-and-push.outputs.extra_pytest }}" export HF_TOKEN=${{ secrets.HF_TOKEN }} echo $DOCKER_IMAGE - pytest -s -vv integration-tests ${PYTEST_FLAGS} + pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST} diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 4c8c929f..dbe69244 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -492,6 +492,7 @@ def launcher(event_loop): try: container = client.containers.get(container_name) container.stop() + container.remove() container.wait() except NotFound: pass @@ -514,13 +515,28 @@ def launcher(event_loop): volumes = [f"{DOCKER_VOLUME}:/data"] if DOCKER_DEVICES: - devices = DOCKER_DEVICES.split(",") + if DOCKER_DEVICES.lower() == "none": + devices = [] + else: + devices = DOCKER_DEVICES.strip().split(",") visible = os.getenv("ROCR_VISIBLE_DEVICES") if visible: env["ROCR_VISIBLE_DEVICES"] = visible device_requests = [] + if not devices: + devices = None + elif devices == ["nvidia.com/gpu=all"]: + devices = None + device_requests = [ + docker.types.DeviceRequest( + driver="cdi", + # count=gpu_count, + device_ids=[f"nvidia.com/gpu={i}"], + ) + for i in range(gpu_count) + ] else: - devices = [] + devices = None device_requests = [ docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]]) ] @@ -540,21 +556,23 @@ def launcher(event_loop): shm_size="1G", ) - yield ContainerLauncherHandle(client, container.name, port) - - if not use_flash_attention: - del env["USE_FLASH_ATTENTION"] - try: - container.stop() - container.wait() - except NotFound: - pass + yield ContainerLauncherHandle(client, container.name, port) - container_output = container.logs().decode("utf-8") - print(container_output, file=sys.stderr) + if not use_flash_attention: + del env["USE_FLASH_ATTENTION"] - container.remove() + try: + container.stop() + container.wait() + except NotFound: + pass + + container_output = container.logs().decode("utf-8") + print(container_output, file=sys.stderr) + + finally: + container.remove() if DOCKER_IMAGE is not None: return docker_launcher