hf_text-generation-inference/.github/workflows/build.yaml

244 lines
9.4 KiB
YAML
Raw Normal View History

2023-01-31 12:14:05 -07:00
name: Build and push docker image to internal registry
on:
workflow_call:
inputs:
hardware:
type: string
description: Hardware
# options:
# - cuda
# - rocm
2024-06-28 10:08:27 -06:00
# - xpu
required: true
release-tests:
description: "Run release integration tests"
required: true
default: false
type: boolean
2023-01-31 12:14:05 -07:00
jobs:
build-and-push:
outputs:
docker_image: ${{ steps.final.outputs.docker_image }}
docker_devices: ${{ steps.final.outputs.docker_devices }}
2024-06-17 04:01:17 -06:00
docker_volume: ${{ steps.final.outputs.docker_volume}}
runs_on: ${{ steps.final.outputs.runs_on }}
label: ${{ steps.final.outputs.label }}
2023-05-15 15:36:30 -06:00
concurrency:
group: ${{ github.workflow }}-build-and-push-image-${{ inputs.hardware }}-${{ github.head_ref || github.run_id }}
2023-05-15 15:36:30 -06:00
cancel-in-progress: true
# TODO see with @Glegendre to get CPU runner here instead
Internal runner ? (#2023) # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflects the extent of your awesome contribution. Then, please replace this with a description of the change and which issue is fixed (if applicable). Please also include relevant motivation and context. List any dependencies (if any) that are required for this change. Once you're done, someone will review your PR shortly (see the section "Who can review?" below to tag some potential reviewers). They may suggest changes to make the code even better. If no one reviewed your PR after a week has passed, don't hesitate to post a new comment @-mentioning the same persons---sometimes notifications get lost. --> <!-- Remove if not applicable --> Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#start-contributing-pull-requests), Pull Request section? - [ ] Was this discussed/approved via a Github issue or the [forum](https://discuss.huggingface.co/)? Please add a link to it if that's the case. - [ ] Did you make sure to update the documentation with your changes? Here are the [documentation guidelines](https://github.com/huggingface/transformers/tree/main/docs), and [here are tips on formatting docstrings](https://github.com/huggingface/transformers/tree/main/docs#writing-source-documentation). - [ ] Did you write any new necessary tests? ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. <!-- Your PR will be replied to more quickly if you can figure out the right person to tag with @ @OlivierDehaene OR @Narsil -->
2024-06-06 10:51:42 -06:00
runs-on: [self-hosted, nvidia-gpu , multi-gpu, 4-a10, ci]
permissions:
2023-04-13 08:23:47 -06:00
contents: write
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write
2023-04-13 08:32:37 -06:00
security-events: write
2024-06-11 05:25:14 -06:00
2023-01-31 12:14:05 -07:00
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4.4.1
2024-06-11 05:25:14 -06:00
- name: Construct harware variables
shell: bash
run: |
case ${{ inputs.hardware }} in
cuda)
export dockerfile="Dockerfile"
export label_extension=""
export docker_devices=""
export runs_on="nvidia-gpu"
;;
rocm)
export dockerfile="Dockerfile_amd"
export label_extension="-rocm"
export docker_devices="/dev/kfd,/dev/dri"
2024-06-11 05:25:14 -06:00
export runs_on="amd-gpu-tgi"
;;
2024-06-11 05:25:14 -06:00
xpu)
export dockerfile="Dockerfile_intel"
export label_extension="-intel"
export docker_devices=""
export runs_on="ubuntu-latest"
;;
esac
echo $dockerfile
echo "Dockerfile=${dockerfile}"
echo $label_extension
echo $docker_devices
echo $runs_on
echo "DOCKERFILE=${dockerfile}" >> $GITHUB_ENV
echo "LABEL=${label_extension}" >> $GITHUB_ENV
echo "DOCKER_DEVICES=${docker_devices}" >> $GITHUB_ENV
echo "RUNS_ON=${runs_on}" >> $GITHUB_ENV
2024-06-11 05:25:14 -06:00
- name: Initialize Docker Buildx
uses: docker/setup-buildx-action@v3
with:
install: true
2024-07-01 07:42:26 -06:00
config-inline: |
[registry."docker.io"]
mirrors = ["registry.github-runners.huggingface.tech"]
2024-06-21 03:55:58 -06:00
2023-01-31 12:14:05 -07:00
- name: Login to GitHub Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
2023-01-31 12:14:05 -07:00
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
2024-06-11 05:25:14 -06:00
2023-02-06 06:33:56 -07:00
- name: Login to Azure Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v3
2023-02-06 06:33:56 -07:00
with:
username: ${{ secrets.AZURE_DOCKER_USERNAME }}
password: ${{ secrets.AZURE_DOCKER_PASSWORD }}
registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io
2024-06-11 05:25:14 -06:00
2023-05-15 15:36:30 -06:00
# If pull request
2023-01-31 12:14:05 -07:00
- name: Extract metadata (tags, labels) for Docker
2023-05-15 15:36:30 -06:00
if: ${{ github.event_name == 'pull_request' }}
id: meta-pr
uses: docker/metadata-action@v5
2023-05-15 15:36:30 -06:00
with:
images: |
registry-push.github-runners.huggingface.tech/api-inference/community/text-generation-inference
2023-05-15 15:36:30 -06:00
tags: |
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
2023-05-15 15:36:30 -06:00
# If main, release or tag
- name: Extract metadata (tags, labels) for Docker
if: ${{ github.event_name != 'pull_request' }}
2023-01-31 12:14:05 -07:00
id: meta
uses: docker/metadata-action@v4.3.0
with:
flavor: |
latest=auto
images: |
registry-push.github-runners.huggingface.tech/api-inference/community/text-generation-inference
ghcr.io/huggingface/text-generation-inference
2023-02-06 06:33:56 -07:00
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
2023-01-31 12:14:05 -07:00
tags: |
type=semver,pattern={{version}}${{ env.LABEL }}
type=semver,pattern={{major}}.{{minor}}${{ env.LABEL }}
type=raw,value=latest${{ env.LABEL }},enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
2023-01-31 12:14:05 -07:00
- name: Build and push Docker image
id: build-and-push
uses: docker/build-push-action@v4
2023-01-31 12:14:05 -07:00
with:
context: .
file: ${{ env.DOCKERFILE }}
2023-05-15 15:36:30 -06:00
push: true
2023-01-31 12:14:05 -07:00
platforms: 'linux/amd64'
build-args: |
GIT_SHA=${{ env.GITHUB_SHA }}
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}${{ env.LABEL }}
2023-05-15 15:36:30 -06:00
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
cache-from: type=registry,ref=registry-push.github-runners.huggingface.tech/api-inference/community/text-generation-inference:cache${{ env.LABEL }},mode=min
cache-to: type=registry,ref=registry-push.github-runners.huggingface.tech/api-inference/community/text-generation-inference:cache${{ env.LABEL }},mode=min
- name: Final
id: final
run: |
echo "docker_image=registry-push.github-runners.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT}}${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
echo "docker_devices=${{ env.DOCKER_DEVICES }}" >> "$GITHUB_OUTPUT"
echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT"
echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT"
2024-06-11 05:25:14 -06:00
2024-06-17 04:01:17 -06:00
if [[ ${{ inputs.hardware }} == "rocm" ]]
then
2024-06-20 03:03:00 -06:00
echo "docker_volume=/data/cache/.cache/huggingface/hub" >> "$GITHUB_OUTPUT"
2024-06-17 04:01:17 -06:00
else
echo "docker_volume=/mnt/cache" >> "$GITHUB_OUTPUT"
fi
2024-06-26 04:43:57 -06:00
prepare_integration_tests:
runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"]
needs: [build-and-push]
2024-06-26 04:43:57 -06:00
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
# Ideally, we would use the image from registry.internal.huggingface.tech but we can not login to the private registry outside of tailscale,
# and even adding a previous job with tailscale login still results in `Docker login for 'registry.internal.huggingface.tech' failed with exit code 1`.
2024-06-17 04:01:17 -06:00
container:
2024-07-02 07:32:53 -06:00
image: ${{ needs.build-and-push.outputs.docker_image }}
2024-06-17 04:01:17 -06:00
options: --shm-size "16gb" --ipc host -v ${{ needs.build-and-push.outputs.docker_volume }}:/data
steps:
2024-06-20 03:28:10 -06:00
- name: Checkout repository
uses: actions/checkout@v4
2024-06-17 04:01:17 -06:00
- name: Clean Hugging Face cache
2024-06-20 03:28:10 -06:00
shell: bash
2024-06-17 04:01:17 -06:00
run: |
if [[ ${{ inputs.hardware }} == "rocm" ]]
then
2024-06-20 03:28:10 -06:00
echo "pwd:"
pwd
echo "ls:"
ls
python3 integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }} --cache-dir /data
2024-06-27 08:51:11 -06:00
# Avoid permissions issues in the next step not run within docker (File was unable to be removed Error: EACCES).
if [[ $PWD == *"text-generation-inference"* ]]; then
2024-06-28 03:49:20 -06:00
rm -rf -- ..?* .[!.]* *
2024-06-27 08:51:11 -06:00
fi
2024-06-17 04:01:17 -06:00
fi
integration_tests:
concurrency:
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
2024-06-21 10:28:04 -06:00
needs: [build-and-push, prepare_integration_tests]
runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"]
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
env:
PYTEST_FLAGS: ${{ (startsWith(github.ref, 'refs/tags/') || github.ref == 'refs/heads/main' || inputs.release-tests == true) && '--release' || '' }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
2024-06-11 05:25:14 -06:00
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4.4.1
2024-06-11 05:25:14 -06:00
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
2024-06-11 05:25:14 -06:00
- name: Install
run: |
make install-integration-tests
2024-06-11 05:25:14 -06:00
- name: Run tests
run: |
export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }}
2024-06-28 07:10:43 -06:00
export HF_TOKEN=${{ secrets.HF_TOKEN }}
2024-06-11 05:25:14 -06:00
export DOCKER_IMAGE=${{ needs.build-and-push.outputs.docker_image }}
echo "DOCKER_IMAGE:"
echo $DOCKER_IMAGE
2024-06-11 05:25:14 -06:00
export SYSTEM=${{ inputs.hardware }}
echo "SYSTEM:"
echo $SYSTEM
export DOCKER_VOLUME=${{ needs.build-and-push.outputs.docker_volume }}
echo "DOCKER_VOLUME:"
echo $DOCKER_VOLUME
# TunableOp warmup is rather slow, do it only for a few seqlens.
if [[ ${{ inputs.hardware }} == "rocm" ]]
then
PYTORCH_TUNABLEOP_SEQLENS=2,4
fi
2024-06-26 04:08:42 -06:00
pytest -s -vvvvv integration-tests ${PYTEST_FLAGS}