AMD CI (#2589)
* Only run 1 valid test. * TRying the tailscale action quickly. * ? * bash spaces. * Remove tailscale. * More quotes. * mnt2 ? * Othername to avoid recursive directories. * Good old tmate. * Remove tmate. * Trying a few things. * Remove some stuff. * Sleep ? * Tmp * busybox * Launcher tgi * Starting hello * Busybox in python * No device. * Removing all variables ? * A un moment donné. * Tmp * Tmp2 * DEvice request, no container name * No device requests * Without pytest. * No pytest. * from env * Start with devices * Attemp #1 * Remove stdin messing * Only 1 test, no container name * Raw tgi * Sending args. * Show pip freeze. * Start downloading with token * Giving HIP devices. * Mount volume + port forward * Without pytest. * No token * Repeated arguments * Wrong kwarg. * On 2 GPUs * Fallback to single shard CI test. * Testing * yaml * Common cache ? * Trailing slash ? * Docker volume split. * Fix docker volume * Fixing ? * ? * Try no devices ? * Flash llama on intel CPU ? * Fix nvidia ? * Temp deactivate intel, activate nvidia ?
This commit is contained in:
parent
9ed0c85fe1
commit
43f39f6894
|
@ -21,9 +21,11 @@ jobs:
|
|||
build-and-push:
|
||||
outputs:
|
||||
docker_image: ${{ steps.final.outputs.docker_image }}
|
||||
docker_volume: ${{ steps.final.outputs.docker_volume }}
|
||||
docker_devices: ${{ steps.final.outputs.docker_devices }}
|
||||
runs_on: ${{ steps.final.outputs.runs_on }}
|
||||
label: ${{ steps.final.outputs.label }}
|
||||
extra_pytest: ${{ steps.final.outputs.extra_pytest }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
@ -44,32 +46,39 @@ jobs:
|
|||
cuda)
|
||||
export dockerfile="Dockerfile"
|
||||
export label_extension=""
|
||||
export docker_volume="/mnt/cache"
|
||||
export docker_devices=""
|
||||
export runs_on="aws-g6-12xl-plus-priv-cache"
|
||||
export platform=""
|
||||
export extra_pytest=""
|
||||
;;
|
||||
rocm)
|
||||
export dockerfile="Dockerfile_amd"
|
||||
export label_extension="-rocm"
|
||||
export docker_devices="/dev/kfd,/dev/dri"
|
||||
# TODO Re-enable when they pass.
|
||||
# export runs_on="amd-gpu-tgi"
|
||||
export runs_on="ubuntu-latest"
|
||||
export docker_volume="/mnt"
|
||||
export runs_on="amd-gpu-runners"
|
||||
export platform=""
|
||||
export extra_pytest="-k test_flash_gemma_gptq_load"
|
||||
;;
|
||||
intel-xpu)
|
||||
export dockerfile="Dockerfile_intel"
|
||||
export label_extension="-intel-xpu"
|
||||
export docker_devices=""
|
||||
export docker_volume="/mnt/cache"
|
||||
export runs_on="ubuntu-latest"
|
||||
export platform="xpu"
|
||||
export extra_pytest=""
|
||||
;;
|
||||
intel-cpu)
|
||||
export dockerfile="Dockerfile_intel"
|
||||
export label_extension="-intel-cpu"
|
||||
export docker_devices=""
|
||||
export docker_devices="none"
|
||||
export docker_volume="/mnt/cache"
|
||||
export runs_on="ubuntu-latest"
|
||||
# export runs_on="aws-highmemory-32-plus-priv"
|
||||
export platform="cpu"
|
||||
export extra_pytest="-k test_flash_llama_load"
|
||||
;;
|
||||
esac
|
||||
echo $dockerfile
|
||||
|
@ -81,8 +90,10 @@ jobs:
|
|||
echo "DOCKERFILE=${dockerfile}" >> $GITHUB_ENV
|
||||
echo "LABEL=${label_extension}" >> $GITHUB_ENV
|
||||
echo "PLATFORM=${platform}" >> $GITHUB_ENV
|
||||
echo "DOCKER_VOLUME=${docker_volume}" >> $GITHUB_ENV
|
||||
echo "DOCKER_DEVICES=${docker_devices}" >> $GITHUB_ENV
|
||||
echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV
|
||||
echo "EXTRA_PYTEST=${extra_pytest}" >> $GITHUB_ENV
|
||||
echo REGISTRY_MIRROR=$REGISTRY_MIRROR >> $GITHUB_ENV
|
||||
- name: Initialize Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
@ -157,16 +168,18 @@ jobs:
|
|||
run: |
|
||||
echo "docker_image=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
|
||||
echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT"
|
||||
echo "docker_volume=${{ env.DOCKER_VOLUME }}" >> "$GITHUB_OUTPUT"
|
||||
echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT"
|
||||
echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
|
||||
echo "extra_pytest=${{ env.EXTRA_PYTEST }}" >> "$GITHUB_OUTPUT"
|
||||
integration_tests:
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
needs: build-and-push
|
||||
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
|
||||
runs-on:
|
||||
group: ${{ needs.build-and-push.outputs.runs_on }}
|
||||
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
|
||||
env:
|
||||
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '--release' }}
|
||||
steps:
|
||||
|
@ -177,15 +190,16 @@ jobs:
|
|||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
python-version: "3.11"
|
||||
- name: Install
|
||||
run: |
|
||||
make install-integration-tests
|
||||
- name: Run tests
|
||||
run: |
|
||||
export DOCKER_VOLUME=/mnt/cache
|
||||
export DOCKER_VOLUME=${{ needs.build-and-push.outputs.docker_volume }}
|
||||
export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
|
||||
export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
|
||||
export EXTRA_PYTEST="${{ needs.build-and-push.outputs.extra_pytest }}"
|
||||
export HF_TOKEN=${{ secrets.HF_TOKEN }}
|
||||
echo $DOCKER_IMAGE
|
||||
pytest -s -vv integration-tests ${PYTEST_FLAGS}
|
||||
pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST}
|
||||
|
|
|
@ -492,6 +492,7 @@ def launcher(event_loop):
|
|||
try:
|
||||
container = client.containers.get(container_name)
|
||||
container.stop()
|
||||
container.remove()
|
||||
container.wait()
|
||||
except NotFound:
|
||||
pass
|
||||
|
@ -514,13 +515,28 @@ def launcher(event_loop):
|
|||
volumes = [f"{DOCKER_VOLUME}:/data"]
|
||||
|
||||
if DOCKER_DEVICES:
|
||||
devices = DOCKER_DEVICES.split(",")
|
||||
if DOCKER_DEVICES.lower() == "none":
|
||||
devices = []
|
||||
else:
|
||||
devices = DOCKER_DEVICES.strip().split(",")
|
||||
visible = os.getenv("ROCR_VISIBLE_DEVICES")
|
||||
if visible:
|
||||
env["ROCR_VISIBLE_DEVICES"] = visible
|
||||
device_requests = []
|
||||
if not devices:
|
||||
devices = None
|
||||
elif devices == ["nvidia.com/gpu=all"]:
|
||||
devices = None
|
||||
device_requests = [
|
||||
docker.types.DeviceRequest(
|
||||
driver="cdi",
|
||||
# count=gpu_count,
|
||||
device_ids=[f"nvidia.com/gpu={i}"],
|
||||
)
|
||||
for i in range(gpu_count)
|
||||
]
|
||||
else:
|
||||
devices = []
|
||||
devices = None
|
||||
device_requests = [
|
||||
docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]])
|
||||
]
|
||||
|
@ -540,21 +556,23 @@ def launcher(event_loop):
|
|||
shm_size="1G",
|
||||
)
|
||||
|
||||
yield ContainerLauncherHandle(client, container.name, port)
|
||||
|
||||
if not use_flash_attention:
|
||||
del env["USE_FLASH_ATTENTION"]
|
||||
|
||||
try:
|
||||
container.stop()
|
||||
container.wait()
|
||||
except NotFound:
|
||||
pass
|
||||
yield ContainerLauncherHandle(client, container.name, port)
|
||||
|
||||
container_output = container.logs().decode("utf-8")
|
||||
print(container_output, file=sys.stderr)
|
||||
if not use_flash_attention:
|
||||
del env["USE_FLASH_ATTENTION"]
|
||||
|
||||
container.remove()
|
||||
try:
|
||||
container.stop()
|
||||
container.wait()
|
||||
except NotFound:
|
||||
pass
|
||||
|
||||
container_output = container.logs().decode("utf-8")
|
||||
print(container_output, file=sys.stderr)
|
||||
|
||||
finally:
|
||||
container.remove()
|
||||
|
||||
if DOCKER_IMAGE is not None:
|
||||
return docker_launcher
|
||||
|
|
Loading…
Reference in New Issue