From 2330052aa2084c63cc6846042a3fb612b95db721 Mon Sep 17 00:00:00 2001 From: Felix Marty <9808326+fxmarty@users.noreply.github.com> Date: Wed, 26 Jun 2024 10:43:57 +0000 Subject: [PATCH] debug --- .github/workflows/build.yaml | 17 ++++++++++++++++- integration-tests/clean_cache_and_download.py | 2 ++ integration-tests/conftest.py | 5 +---- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9a953939..ad5e1e1d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -168,13 +168,28 @@ jobs: fi - prepare_integration_tests: + login_tailscale: runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"] needs: build-and-push concurrency: group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' + steps: + - name: Tailscale + uses: huggingface/tailscale-action@main + if: needs.build-and-push.outputs.runs_on != 'amd-gpu-tgi' + with: + authkey: ${{ secrets.TAILSCALE_AUTHKEY }} + + + prepare_integration_tests: + runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"] + needs: [build-and-push, login_tailscale] + concurrency: + group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' container: image: ${{ needs.build-and-push.outputs.docker_image }} credentials: diff --git a/integration-tests/clean_cache_and_download.py b/integration-tests/clean_cache_and_download.py index 7d843b76..4fea3f0a 100644 --- a/integration-tests/clean_cache_and_download.py +++ b/integration-tests/clean_cache_and_download.py @@ -112,6 +112,8 @@ def cleanup_cache(token: str, cache_dir: str): total_required_cached_size = sum(cached_required_size_per_model.values()) total_other_cached_size = sum(cache_size_per_model.values()) + print("total_required_size", total_required_size) + print("total_required_cached_size", total_required_cached_size) total_non_cached_required_size = total_required_size - total_required_cached_size assert total_non_cached_required_size >= 0 diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index a81de3f0..f78301c6 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -394,7 +394,7 @@ def launcher(event_loop): with tempfile.TemporaryFile("w+") as tmp: # We'll output stdout/stderr to a temporary file. Using a pipe # cause the process to block until stdout is read. - print("call subprocess.Popen, with args", args) + print("subprocess.Popen:", args) with subprocess.Popen( args, stdout=tmp, @@ -426,7 +426,6 @@ def launcher(event_loop): max_batch_prefill_tokens: Optional[int] = None, max_total_tokens: Optional[int] = None, ): - print("call docker launcher") port = random.randint(8000, 10_000) args = ["--model-id", model_id, "--env"] @@ -494,8 +493,6 @@ def launcher(event_loop): docker.types.DeviceRequest(count=gpu_count, capabilities=[["gpu"]]) ] - print("call client.containers.run") - print("container_name", container_name) container = client.containers.run( DOCKER_IMAGE, command=args,