* Only run 1 valid test.

* TRying the tailscale action quickly.

* ?

* bash spaces.

* Remove tailscale.

* More quotes.

* mnt2 ?

* Othername to avoid recursive directories.

* Good old tmate.

* Remove tmate.

* Trying a few things.

* Remove some stuff.

* Sleep ?

* Tmp

* busybox

* Launcher tgi

* Starting hello

* Busybox in python

* No device.

* Removing all variables ?

* A un moment donné.

* Tmp

* Tmp2

* DEvice request, no container name

* No device requests

* Without pytest.

* No pytest.

* from env

* Start with devices

* Attemp #1

* Remove stdin messing

* Only 1 test, no container name

* Raw tgi

* Sending args.

* Show pip freeze.

* Start downloading with token

* Giving HIP devices.

* Mount volume + port forward

* Without pytest.

* No token

* Repeated arguments

* Wrong kwarg.

* On 2 GPUs

* Fallback to single shard CI test.

* Testing

* yaml

* Common cache ?

* Trailing slash ?

* Docker volume split.

* Fix docker volume

* Fixing ?

* ?

* Try no devices ?

* Flash llama on intel CPU ?

* Fix nvidia ?

* Temp deactivate intel, activate nvidia ?
This commit is contained in:
Nicolas Patry 2024-10-09 17:50:49 +02:00 committed by GitHub
parent 9ed0c85fe1
commit 43f39f6894
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 54 additions and 22 deletions

View File

@ -21,9 +21,11 @@ jobs:
build-and-push:
outputs:
docker_image: ${{ steps.final.outputs.docker_image }}
docker_volume: ${{ steps.final.outputs.docker_volume }}
docker_devices: ${{ steps.final.outputs.docker_devices }}
runs_on: ${{ steps.final.outputs.runs_on }}
label: ${{ steps.final.outputs.label }}
extra_pytest: ${{ steps.final.outputs.extra_pytest }}
concurrency:
group: ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
@ -44,32 +46,39 @@ jobs:
cuda)
export dockerfile="Dockerfile"
export label_extension=""
export docker_volume="/mnt/cache"
export docker_devices=""
export runs_on="aws-g6-12xl-plus-priv-cache"
export platform=""
export extra_pytest=""
;;
rocm)
export dockerfile="Dockerfile_amd"
export label_extension="-rocm"
export docker_devices="/dev/kfd,/dev/dri"
# TODO Re-enable when they pass.
# export runs_on="amd-gpu-tgi"
export runs_on="ubuntu-latest"
export docker_volume="/mnt"
export runs_on="amd-gpu-runners"
export platform=""
export extra_pytest="-k test_flash_gemma_gptq_load"
;;
intel-xpu)
export dockerfile="Dockerfile_intel"
export label_extension="-intel-xpu"
export docker_devices=""
export docker_volume="/mnt/cache"
export runs_on="ubuntu-latest"
export platform="xpu"
export extra_pytest=""
;;
intel-cpu)
export dockerfile="Dockerfile_intel"
export label_extension="-intel-cpu"
export docker_devices=""
export docker_devices="none"
export docker_volume="/mnt/cache"
export runs_on="ubuntu-latest"
# export runs_on="aws-highmemory-32-plus-priv"
export platform="cpu"
export extra_pytest="-k test_flash_llama_load"
;;
esac
echo $dockerfile
@ -81,8 +90,10 @@ jobs:
echo "DOCKERFILE=${dockerfile}" >> $GITHUB_ENV
echo "LABEL=${label_extension}" >> $GITHUB_ENV
echo "PLATFORM=${platform}" >> $GITHUB_ENV
echo "DOCKER_VOLUME=${docker_volume}" >> $GITHUB_ENV
echo "DOCKER_DEVICES=${docker_devices}" >> $GITHUB_ENV
echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV
echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
- name: Initialize Docker Buildx
uses: docker/setup-buildx-action@v3
@ -157,16 +168,18 @@ jobs:
run: |
echo "docker_image=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT"
echo "docker_volume=${{ env.DOCKER_VOLUME }}" >> "$GITHUB_OUTPUT"
echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT"
echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
echo "extra_pytest=${{ env.EXTRA_PYTEST }}" >> "$GITHUB_OUTPUT"
integration_tests:
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
needs: build-and-push
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
runs-on:
group: ${{ needs.build-and-push.outputs.runs_on }}
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
env:
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }}
steps:
@ -177,15 +190,16 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
python-version: "3.11"
- name: Install
run: |
make install-integration-tests
- name: Run tests
run: |
export DOCKER_VOLUME=/mnt/cache
export DOCKER_VOLUME=${{ needs.build-and-push.outputs.docker_volume }}
export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
export EXTRA_PYTEST="${{ needs.build-and-push.outputs.extra_pytest }}"
export HF_TOKEN=${{ secrets.HF_TOKEN }}
echo $DOCKER_IMAGE
pytest -s -vv integration-tests ${PYTEST_FLAGS}
pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST}

View File

@ -492,6 +492,7 @@ def launcher(event_loop):
try:
container = client.containers.get(container_name)
container.stop()
container.remove()
container.wait()
except NotFound:
pass
@ -514,13 +515,28 @@ def launcher(event_loop):
volumes = [f"{DOCKER_VOLUME}:/data"]
if DOCKER_DEVICES:
devices = DOCKER_DEVICES.split(",")
if DOCKER_DEVICES.lower() == "none":
devices = []
else:
devices = DOCKER_DEVICES.strip().split(",")
visible = os.getenv("ROCR_VISIBLE_DEVICES")
if visible:
env["ROCR_VISIBLE_DEVICES"] = visible
device_requests = []
if not devices:
devices = None
elif devices == ["nvidia.com/gpu=all"]:
devices = None
device_requests = [
docker.types.DeviceRequest(
driver="cdi",
# count=gpu_count,
device_ids=[f"nvidia.com/gpu={i}"],
)
for i in range(gpu_count)
]
else:
devices = []
devices = None
device_requests = [
docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]])
]
@ -540,6 +556,7 @@ def launcher(event_loop):
shm_size="1G",
)
try:
yield ContainerLauncherHandle(client, container.name, port)
if not use_flash_attention:
@ -554,6 +571,7 @@ def launcher(event_loop):
container_output = container.logs().decode("utf-8")
print(container_output, file=sys.stderr)
finally:
container.remove()
if DOCKER_IMAGE is not None: