From 66b277321d738ea6f0703b485ecefb61fb3f5441 Mon Sep 17 00:00:00 2001 From: OlivierDehaene Date: Mon, 15 May 2023 15:53:08 +0200 Subject: [PATCH] feat(ci): custom gpu runners (#328) --- .github/workflows/build.yaml | 100 +++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 45 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 289e4f67..0a99fb52 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -25,8 +25,44 @@ concurrency: cancel-in-progress: true jobs: + start-runner: + name: Start self-hosted EC2 runner + runs-on: ubuntu-latest + env: + AWS_REGION: us-east-1 + EC2_AMI_ID: ami-03cfed9ea28f4b002 + EC2_INSTANCE_TYPE: g5.12xlarge + EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc + EC2_SECURITY_GROUP: sg-04d472c808f365022 + outputs: + label: ${{ steps.start-ec2-runner.outputs.label }} + ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }} + steps: + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + - name: Start EC2 runner + id: start-ec2-runner + uses: philschmid/philschmid-ec2-github-runner@main + with: + mode: start + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + ec2-image-id: ${{ env.EC2_AMI_ID }} + ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }} + subnet-id: ${{ env.EC2_SUBNET_ID }} + security-group-id: ${{ env.EC2_SECURITY_GROUP }} + aws-resource-tags: > # optional, requires additional permissions + [ + {"Key": "Name", "Value": "ec2-tgi-github-runner"}, + {"Key": "GitHubRepository", "Value": "${{ github.repository }}"} + ] + build-and-push-image: - runs-on: large-runner + needs: start-runner # required to start the main job when the runner is ready + runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner permissions: contents: write packages: write @@ -49,7 +85,7 @@ jobs: with: cosign-release: 'v1.13.1' - name: Tailscale - uses: tailscale/github-action@v1 + uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966 with: authkey: ${{ secrets.TAILSCALE_AUTHKEY }} - name: Login to GitHub Container Registry @@ -136,52 +172,26 @@ jobs: with: sarif_file: 'trivy-results.sarif' - build-and-push-sagemaker-image: + stop-runner: + name: Stop self-hosted EC2 runner needs: + - start-runner - build-and-push-image runs-on: ubuntu-latest + env: + AWS_REGION: us-east-1 + if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs steps: - - name: Checkout repository - uses: actions/checkout@v3 - - name: Initialize Docker Buildx - uses: docker/setup-buildx-action@v2.0.0 + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 with: - install: true - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4.4.1 - - name: Tailscale - uses: tailscale/github-action@v1 + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ env.AWS_REGION }} + - name: Stop EC2 runner + uses: philschmid/philschmid-ec2-github-runner@main with: - authkey: ${{ secrets.TAILSCALE_AUTHKEY }} - - name: Login to internal Container Registry - uses: docker/login-action@v2.1.0 - with: - username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }} - password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }} - registry: registry.internal.huggingface.tech - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v4.3.0 - with: - flavor: | - latest=auto - images: | - registry.internal.huggingface.tech/api-inference/community/text-generation-inference/sagemaker - tags: | - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} - type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }} - - name: Build and push Docker image - uses: docker/build-push-action@v2 - with: - context: . - file: Dockerfile - push: ${{ github.event_name != 'pull_request' }} - platforms: 'linux/amd64' - build-args: | - GIT_SHA={{ env.GITHUB_SHA }} - target: sagemaker - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max \ No newline at end of file + mode: stop + github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }} + label: ${{ needs.start-runner.outputs.label }} + ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }} \ No newline at end of file