hf_text-generation-inference/.github/workflows/load_test.yaml

name: Nightly load test

on:
  schedule:
    - cron: '0 0 * * 1-5'

  pull_request:
    paths:
      - ".github/workflows/load_test.yaml"
    branches:
      - 'main'

jobs:
  start-runner:
    name: Start self-hosted EC2 runner
    runs-on: ubuntu-latest
    env:
      AWS_REGION: eu-central-1
      EC2_AMI_ID: ami-0ab09c07cfd194259
      EC2_INSTANCE_TYPE: g5.12xlarge
      EC2_SUBNET_ID: subnet-988fd9f2,subnet-6f56db13,subnet-6a039326
      EC2_SECURITY_GROUP: sg-072f92ae3082936c6
    outputs:
      label: ${{ steps.start-ec2-runner.outputs.label }}
      ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
    steps:
      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v1
        with:
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: ${{ env.AWS_REGION }}
      - name: Start EC2 runner
        id: start-ec2-runner
        uses: philschmid/philschmid-ec2-github-runner@main
        with:
          mode: start
          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
          ec2-image-id: ${{ env.EC2_AMI_ID }}
          ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
          subnet-id: ${{ env.EC2_SUBNET_ID }}
          security-group-id: ${{ env.EC2_SECURITY_GROUP }}
          aws-resource-tags: > # optional, requires additional permissions
            [
              {"Key": "Name", "Value": "ec2-tgi-github-runner"},
              {"Key": "GitHubRepository", "Value": "${{ github.repository }}"}
            ]

  load-tests:
    concurrency:
      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
      cancel-in-progress: true
    needs: start-runner # required to start the main job when the runner is ready
    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
    env:
      DOCKER_VOLUME: /cache
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3

      - name: Prepare disks
        run: |
          sudo mkfs -t ext4 /dev/nvme1n1
          sudo mkdir ${{ env.DOCKER_VOLUME }}
          sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}

      - name: Install k6
        run: |
          curl https://github.com/grafana/k6/releases/download/v0.44.0/k6-v0.44.0-linux-amd64.tar.gz -L | tar xvz --strip-components 1

      - name: Start starcoder
        run: |
          docker run --name tgi-starcoder --rm --gpus all -p 3000:80 -v ${{ env.DOCKER_VOLUME }}:/data -e HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} --pull always -d ghcr.io/huggingface/text-generation-inference:latest --model-id bigcode/starcoder --num-shard 2 --max-batch-total-tokens 32768
          sleep 10
          wget --timeout 10 --retry-on-http-error --waitretry=1 --tries=240 http://localhost:3000/health

      - name: Run k6
        run: |
          ./k6 run load_tests/starcoder_load.js

      - name: Stop starcoder
        if: ${{ always() }}
        run: |
          docker stop tgi-starcoder || true

  stop-runner:
    name: Stop self-hosted EC2 runner
    needs:
      - start-runner
      - load-tests
    runs-on: ubuntu-latest
    env:
      AWS_REGION: eu-central-1
    if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
    steps:
      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@v1
        with:
          aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
          aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          aws-region: ${{ env.AWS_REGION }}
      - name: Stop EC2 runner
        uses: philschmid/philschmid-ec2-github-runner@main
        with:
          mode: stop
          github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
          label: ${{ needs.start-runner.outputs.label }}
          ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}