From a83772c87bce3b588c8d4b20d5ff18db03b96c77 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 13 Feb 2024 17:31:39 +0100 Subject: [PATCH] Self hosted for nvidia too. --- .github/workflows/build.yaml | 126 +++++++++++++++++------------------ 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index f5ac46e6..447ff0bd 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -22,47 +22,47 @@ on: - 'main' jobs: - start-runner: - name: Start self-hosted EC2 runner - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - EC2_AMI_ID: ami-03cfed9ea28f4b002 - EC2_INSTANCE_TYPE: g5.12xlarge - EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc - EC2_SECURITY_GROUP: sg-030175c435ac141d6 - outputs: - label: ${{ steps.start-ec2-runner.outputs.label }} - ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Start EC2 runner - id: start-ec2-runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: start - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - ec2-image-id: ${{ env.EC2_AMI_ID }} - ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} - subnet-id: ${{ env.EC2_SUBNET_ID }} - security-group-id: ${{ env.EC2_SECURITY_GROUP }} - aws-resource-tags: > # optional, requires additional permissions - [ - {"Key": "Name", "Value": "ec2-tgi-github-runner"}, - {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} - ] + # start-runner: + # name: Start self-hosted EC2 runner + # runs-on: ubuntu-latest + # env: + # AWS_REGION: us-east-1 + # EC2_AMI_ID: ami-03cfed9ea28f4b002 + # EC2_INSTANCE_TYPE: g5.12xlarge + # EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc + # EC2_SECURITY_GROUP: sg-030175c435ac141d6 + # outputs: + # label: ${{ steps.start-ec2-runner.outputs.label }} + # ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} + # steps: + # - name: Configure AWS credentials + # uses: aws-actions/configure-aws-credentials@v1 + # with: + # aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + # aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + # aws-region: ${{ env.AWS_REGION }} + # - name: Start EC2 runner + # id: start-ec2-runner + # uses: philschmid/philschmid-ec2-github-runner@main + # with: + # mode: start + # github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + # ec2-image-id: ${{ env.EC2_AMI_ID }} + # ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} + # subnet-id: ${{ env.EC2_SUBNET_ID }} + # security-group-id: ${{ env.EC2_SECURITY_GROUP }} + # aws-resource-tags: > # optional, requires additional permissions + # [ + # {"Key": "Name", "Value": "ec2-tgi-github-runner"}, + # {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} + # ] build-and-push-image: concurrency: group: ${{ github.workflow }}-build-and-push-image-${{ github.head_ref || github.run_id }} cancel-in-progress: true - needs: start-runner # required to start the main job when the runner is ready - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + # needs: start-runner # required to start the main job when the runner is ready + runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci] permissions: contents: write packages: write @@ -151,9 +151,9 @@ jobs: group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true needs: - - start-runner + # - start-runner - build-and-push-image # Wait for the docker image to be built - runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner + runs-on: [multi-gpu, nvidia-gpu, 4-a10, ci] env: DOCKER_VOLUME: /cache steps: @@ -274,28 +274,28 @@ jobs: cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min - stop-runner: - name: Stop self-hosted EC2 runner - needs: - - start-runner - - build-and-push-image - # - build-and-push-image-rocm - - integration-tests - runs-on: ubuntu-latest - env: - AWS_REGION: us-east-1 - if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs - steps: - - name: Configure AWS credentials - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - aws-region: ${{ env.AWS_REGION }} - - name: Stop EC2 runner - uses: philschmid/philschmid-ec2-github-runner@main - with: - mode: stop - github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} - label: ${{ needs.start-runner.outputs.label }} - ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} + # stop-runner: + # name: Stop self-hosted EC2 runner + # needs: + # - start-runner + # - build-and-push-image + # # - build-and-push-image-rocm + # - integration-tests + # runs-on: ubuntu-latest + # env: + # AWS_REGION: us-east-1 + # if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs + # steps: + # - name: Configure AWS credentials + # uses: aws-actions/configure-aws-credentials@v1 + # with: + # aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + # aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + # aws-region: ${{ env.AWS_REGION }} + # - name: Stop EC2 runner + # uses: philschmid/philschmid-ec2-github-runner@main + # with: + # mode: stop + # github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + # label: ${{ needs.start-runner.outputs.label }} + # ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}