2023-01-31 12:14:05 -07:00
|
|
|
name: Build and push docker image to internal registry
|
|
|
|
|
|
|
|
on:
|
|
|
|
workflow_dispatch:
|
|
|
|
push:
|
|
|
|
branches:
|
|
|
|
- 'main'
|
2023-02-03 04:43:37 -07:00
|
|
|
tags:
|
|
|
|
- 'v*'
|
2023-01-31 12:14:05 -07:00
|
|
|
pull_request:
|
2023-03-23 11:01:30 -06:00
|
|
|
paths:
|
|
|
|
- ".github/workflows/build.yaml"
|
2023-05-16 12:22:11 -06:00
|
|
|
- "integration-tests/**"
|
2023-03-23 11:01:30 -06:00
|
|
|
- "server/**"
|
|
|
|
- "proto/**"
|
|
|
|
- "router/**"
|
|
|
|
- "launcher/**"
|
|
|
|
- "Cargo.lock"
|
|
|
|
- "rust-toolchain.toml"
|
|
|
|
- "Dockerfile"
|
2023-01-31 12:14:05 -07:00
|
|
|
branches:
|
|
|
|
- 'main'
|
|
|
|
|
|
|
|
jobs:
|
2023-05-15 07:53:08 -06:00
|
|
|
start-runner:
|
|
|
|
name: Start self-hosted EC2 runner
|
|
|
|
runs-on: ubuntu-latest
|
|
|
|
env:
|
|
|
|
AWS_REGION: us-east-1
|
Pali gemma modeling (#1895)
This PR adds paligemma modeling code
Blog post: https://huggingface.co/blog/paligemma
Transformers PR: https://github.com/huggingface/transformers/pull/30814
install the latest changes and run with
```bash
# get the weights
# text-generation-server download-weights gv-hf/PaliGemma-base-224px-hf
# run TGI
text-generation-launcher --model-id gv-hf/PaliGemma-base-224px-hf
```
basic example sending various requests
```python
from huggingface_hub import InferenceClient
client = InferenceClient("http://127.0.0.1:3000")
images = [
"https://huggingface.co/datasets/hf-internal-testing/fixtures-captioning/resolve/main/cow_beach_1.png",
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/rabbit.png",
]
prompts = [
"What animal is in this image?",
"Name three colors in this image.",
"What are 10 colors in this image?",
"Where is the cow standing?",
"answer en Where is the cow standing?",
"Is there a bird in the image?",
"Is ther a cow in the image?",
"Is there a rabbit in the image?",
"how many birds are in the image?",
"how many rabbits are in the image?",
]
for img in images:
print(f"\nImage: {img.split('/')[-1]}")
for prompt in prompts:
inputs = f"![]({img}){prompt}\n"
json_data = {
"inputs": inputs,
"parameters": {
"max_new_tokens": 30,
"do_sample": False,
},
}
generated_output = client.text_generation(prompt, max_new_tokens=30, stream=False)
print([f"{prompt}\n{generated_output}"])
```
---------
Co-authored-by: Nicolas Patry <patry.nicolas@protonmail.com>
2024-05-15 22:58:47 -06:00
|
|
|
EC2_AMI_ID: ami-0789b6925c11b1fb2
|
2023-05-15 07:53:08 -06:00
|
|
|
EC2_INSTANCE_TYPE: g5.12xlarge
|
|
|
|
EC2_SUBNET_ID: subnet-931b34f5,subnet-ecb993cd,subnet-943dc2d8,subnet-45371f1a,subnet-ee93e0df,subnet-fddc3dfc
|
2023-05-23 08:49:11 -06:00
|
|
|
EC2_SECURITY_GROUP: sg-030175c435ac141d6
|
2023-05-15 07:53:08 -06:00
|
|
|
outputs:
|
|
|
|
label: ${{ steps.start-ec2-runner.outputs.label }}
|
|
|
|
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
|
|
|
|
steps:
|
|
|
|
- name: Configure AWS credentials
|
|
|
|
uses: aws-actions/configure-aws-credentials@v1
|
|
|
|
with:
|
|
|
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
|
|
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
|
|
|
aws-region: ${{ env.AWS_REGION }}
|
|
|
|
- name: Start EC2 runner
|
|
|
|
id: start-ec2-runner
|
|
|
|
uses: philschmid/philschmid-ec2-github-runner@main
|
|
|
|
with:
|
|
|
|
mode: start
|
|
|
|
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
|
|
|
|
ec2-image-id: ${{ env.EC2_AMI_ID }}
|
|
|
|
ec2-instance-type: ${{ env.EC2_INSTANCE_TYPE }}
|
|
|
|
subnet-id: ${{ env.EC2_SUBNET_ID }}
|
|
|
|
security-group-id: ${{ env.EC2_SECURITY_GROUP }}
|
|
|
|
aws-resource-tags: > # optional, requires additional permissions
|
|
|
|
[
|
|
|
|
{"Key": "Name", "Value": "ec2-tgi-github-runner"},
|
|
|
|
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"}
|
|
|
|
]
|
|
|
|
|
2023-01-31 12:14:05 -07:00
|
|
|
build-and-push-image:
|
2023-05-15 15:36:30 -06:00
|
|
|
concurrency:
|
2023-11-27 06:08:12 -07:00
|
|
|
group: ${{ github.workflow }}-build-and-push-image-${{ github.head_ref || github.run_id }}
|
2023-05-15 15:36:30 -06:00
|
|
|
cancel-in-progress: true
|
2023-05-15 07:53:08 -06:00
|
|
|
needs: start-runner # required to start the main job when the runner is ready
|
|
|
|
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
|
2023-04-13 07:26:34 -06:00
|
|
|
permissions:
|
2023-04-13 08:23:47 -06:00
|
|
|
contents: write
|
2023-04-13 07:26:34 -06:00
|
|
|
packages: write
|
|
|
|
# This is used to complete the identity challenge
|
|
|
|
# with sigstore/fulcio when running outside of PRs.
|
|
|
|
id-token: write
|
2023-04-13 08:32:37 -06:00
|
|
|
security-events: write
|
2023-01-31 12:14:05 -07:00
|
|
|
steps:
|
2023-04-13 07:26:34 -06:00
|
|
|
- name: Checkout repository
|
|
|
|
uses: actions/checkout@v3
|
2023-03-03 07:07:27 -07:00
|
|
|
- name: Initialize Docker Buildx
|
|
|
|
uses: docker/setup-buildx-action@v2.0.0
|
|
|
|
with:
|
|
|
|
install: true
|
2023-04-13 07:26:34 -06:00
|
|
|
- name: Inject slug/short variables
|
|
|
|
uses: rlespinasse/github-slug-action@v4.4.1
|
2023-01-31 12:14:05 -07:00
|
|
|
- name: Tailscale
|
2023-05-15 07:53:08 -06:00
|
|
|
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
|
2023-01-31 12:14:05 -07:00
|
|
|
with:
|
|
|
|
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
|
|
|
|
- name: Login to GitHub Container Registry
|
2023-04-13 07:26:34 -06:00
|
|
|
if: github.event_name != 'pull_request'
|
2023-01-31 12:14:05 -07:00
|
|
|
uses: docker/login-action@v2
|
|
|
|
with:
|
|
|
|
registry: ghcr.io
|
|
|
|
username: ${{ github.actor }}
|
|
|
|
password: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Login to internal Container Registry
|
|
|
|
uses: docker/login-action@v2.1.0
|
|
|
|
with:
|
|
|
|
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
|
|
|
|
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
|
|
|
|
registry: registry.internal.huggingface.tech
|
2023-02-06 06:33:56 -07:00
|
|
|
- name: Login to Azure Container Registry
|
2023-04-20 03:07:40 -06:00
|
|
|
if: github.event_name != 'pull_request'
|
2023-02-06 06:33:56 -07:00
|
|
|
uses: docker/login-action@v2.1.0
|
|
|
|
with:
|
|
|
|
username: ${{ secrets.AZURE_DOCKER_USERNAME }}
|
|
|
|
password: ${{ secrets.AZURE_DOCKER_PASSWORD }}
|
|
|
|
registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io
|
2023-05-15 15:36:30 -06:00
|
|
|
# If pull request
|
2023-01-31 12:14:05 -07:00
|
|
|
- name: Extract metadata (tags, labels) for Docker
|
2023-05-15 15:36:30 -06:00
|
|
|
if: ${{ github.event_name == 'pull_request' }}
|
|
|
|
id: meta-pr
|
|
|
|
uses: docker/metadata-action@v4.3.0
|
|
|
|
with:
|
|
|
|
images: |
|
|
|
|
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
|
|
|
|
tags: |
|
|
|
|
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
|
|
|
|
# If main, release or tag
|
|
|
|
- name: Extract metadata (tags, labels) for Docker
|
|
|
|
if: ${{ github.event_name != 'pull_request' }}
|
2023-01-31 12:14:05 -07:00
|
|
|
id: meta
|
|
|
|
uses: docker/metadata-action@v4.3.0
|
|
|
|
with:
|
|
|
|
flavor: |
|
|
|
|
latest=auto
|
|
|
|
images: |
|
|
|
|
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
|
2023-04-16 16:26:47 -06:00
|
|
|
ghcr.io/huggingface/text-generation-inference
|
2023-02-06 06:33:56 -07:00
|
|
|
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
|
2023-01-31 12:14:05 -07:00
|
|
|
tags: |
|
2023-02-03 04:43:37 -07:00
|
|
|
type=semver,pattern={{version}}
|
|
|
|
type=semver,pattern={{major}}.{{minor}}
|
2023-01-31 12:14:05 -07:00
|
|
|
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
|
|
|
|
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
|
|
|
|
- name: Build and push Docker image
|
2023-04-13 07:43:17 -06:00
|
|
|
id: build-and-push
|
2023-04-13 07:26:34 -06:00
|
|
|
uses: docker/build-push-action@v4
|
2023-01-31 12:14:05 -07:00
|
|
|
with:
|
|
|
|
context: .
|
|
|
|
file: Dockerfile
|
2023-05-15 15:36:30 -06:00
|
|
|
push: true
|
2023-01-31 12:14:05 -07:00
|
|
|
platforms: 'linux/amd64'
|
2023-04-19 13:36:59 -06:00
|
|
|
build-args: |
|
2023-04-20 10:50:47 -06:00
|
|
|
GIT_SHA=${{ env.GITHUB_SHA }}
|
2023-05-02 07:43:19 -06:00
|
|
|
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}
|
2023-05-15 15:36:30 -06:00
|
|
|
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
|
|
|
|
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
|
2023-05-23 09:42:19 -06:00
|
|
|
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
|
|
|
|
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
|
2023-04-13 07:26:34 -06:00
|
|
|
|
2023-12-15 04:52:24 -07:00
|
|
|
integration-tests:
|
|
|
|
concurrency:
|
|
|
|
group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
|
|
|
|
cancel-in-progress: true
|
|
|
|
needs:
|
|
|
|
- start-runner
|
|
|
|
- build-and-push-image # Wait for the docker image to be built
|
|
|
|
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
|
|
|
|
env:
|
|
|
|
DOCKER_VOLUME: /cache
|
|
|
|
steps:
|
|
|
|
- uses: actions/checkout@v2
|
|
|
|
- name: Inject slug/short variables
|
|
|
|
uses: rlespinasse/github-slug-action@v4.4.1
|
|
|
|
- name: Set up Python
|
|
|
|
uses: actions/setup-python@v4
|
|
|
|
with:
|
|
|
|
python-version: 3.9
|
|
|
|
- name: Tailscale
|
|
|
|
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
|
|
|
|
with:
|
|
|
|
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
|
|
|
|
- name: Prepare disks
|
|
|
|
run: |
|
|
|
|
sudo mkfs -t ext4 /dev/nvme1n1
|
|
|
|
sudo mkdir ${{ env.DOCKER_VOLUME }}
|
|
|
|
sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
|
|
|
|
- name: Install
|
|
|
|
run: |
|
|
|
|
make install-integration-tests
|
|
|
|
- name: Run tests
|
|
|
|
run: |
|
|
|
|
export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
|
|
|
|
export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
|
|
|
pytest -s -vv integration-tests
|
|
|
|
|
2023-11-27 06:08:12 -07:00
|
|
|
build-and-push-image-rocm:
|
|
|
|
concurrency:
|
|
|
|
group: ${{ github.workflow }}-build-and-push-image-rocm-${{ github.head_ref || github.run_id }}
|
|
|
|
cancel-in-progress: true
|
2023-12-15 04:52:24 -07:00
|
|
|
needs:
|
|
|
|
- start-runner
|
|
|
|
- build-and-push-image # Wait for the main docker image to be built
|
|
|
|
- integration-tests # Wait for the main integration-tests
|
2023-11-27 06:08:12 -07:00
|
|
|
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
|
|
|
|
permissions:
|
|
|
|
contents: write
|
|
|
|
packages: write
|
|
|
|
# This is used to complete the identity challenge
|
|
|
|
# with sigstore/fulcio when running outside of PRs.
|
|
|
|
id-token: write
|
|
|
|
security-events: write
|
|
|
|
steps:
|
|
|
|
- name: Checkout repository
|
|
|
|
uses: actions/checkout@v3
|
|
|
|
- name: Initialize Docker Buildx
|
|
|
|
uses: docker/setup-buildx-action@v2.0.0
|
|
|
|
with:
|
|
|
|
install: true
|
|
|
|
- name: Inject slug/short variables
|
|
|
|
uses: rlespinasse/github-slug-action@v4.4.1
|
|
|
|
- name: Tailscale
|
|
|
|
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
|
|
|
|
with:
|
|
|
|
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
|
|
|
|
- name: Login to GitHub Container Registry
|
|
|
|
if: github.event_name != 'pull_request'
|
|
|
|
uses: docker/login-action@v2
|
|
|
|
with:
|
|
|
|
registry: ghcr.io
|
|
|
|
username: ${{ github.actor }}
|
|
|
|
password: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Login to internal Container Registry
|
|
|
|
uses: docker/login-action@v2.1.0
|
|
|
|
with:
|
|
|
|
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
|
|
|
|
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
|
|
|
|
registry: registry.internal.huggingface.tech
|
|
|
|
- name: Login to Azure Container Registry
|
|
|
|
if: github.event_name != 'pull_request'
|
|
|
|
uses: docker/login-action@v2.1.0
|
|
|
|
with:
|
|
|
|
username: ${{ secrets.AZURE_DOCKER_USERNAME }}
|
|
|
|
password: ${{ secrets.AZURE_DOCKER_PASSWORD }}
|
|
|
|
registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io
|
|
|
|
# If pull request
|
|
|
|
- name: Extract metadata (tags, labels) for Docker
|
|
|
|
if: ${{ github.event_name == 'pull_request' }}
|
|
|
|
id: meta-pr
|
|
|
|
uses: docker/metadata-action@v4.3.0
|
|
|
|
with:
|
|
|
|
images: |
|
|
|
|
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
|
|
|
|
tags: |
|
|
|
|
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-rocm
|
|
|
|
# If main, release or tag
|
|
|
|
- name: Extract metadata (tags, labels) for Docker
|
|
|
|
if: ${{ github.event_name != 'pull_request' }}
|
|
|
|
id: meta
|
|
|
|
uses: docker/metadata-action@v4.3.0
|
|
|
|
with:
|
|
|
|
flavor: |
|
|
|
|
latest=false
|
|
|
|
images: |
|
|
|
|
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
|
|
|
|
ghcr.io/huggingface/text-generation-inference
|
|
|
|
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
|
|
|
|
tags: |
|
|
|
|
type=semver,pattern={{version}}-rocm
|
|
|
|
type=semver,pattern={{major}}.{{minor}}-rocm
|
|
|
|
type=raw,value=latest-rocm,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
|
|
|
|
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-rocm
|
|
|
|
- name: Build and push Docker image
|
|
|
|
id: build-and-push
|
|
|
|
uses: docker/build-push-action@v4
|
|
|
|
with:
|
|
|
|
context: .
|
|
|
|
file: Dockerfile_amd
|
|
|
|
push: true
|
|
|
|
platforms: 'linux/amd64'
|
|
|
|
build-args: |
|
|
|
|
GIT_SHA=${{ env.GITHUB_SHA }}
|
|
|
|
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}-rocm
|
|
|
|
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
|
|
|
|
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
|
|
|
|
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
|
|
|
|
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
|
|
|
|
|
2024-04-26 07:48:58 -06:00
|
|
|
build-and-push-image-intel:
|
|
|
|
concurrency:
|
|
|
|
group: ${{ github.workflow }}-build-and-push-image-intel-${{ github.head_ref || github.run_id }}
|
|
|
|
cancel-in-progress: true
|
|
|
|
needs:
|
|
|
|
- start-runner
|
|
|
|
- build-and-push-image # Wait for the main docker image to be built
|
|
|
|
- integration-tests # Wait for the main integration-tests
|
|
|
|
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
|
|
|
|
permissions:
|
|
|
|
contents: write
|
|
|
|
packages: write
|
|
|
|
# This is used to complete the identity challenge
|
|
|
|
# with sigstore/fulcio when running outside of PRs.
|
|
|
|
id-token: write
|
|
|
|
security-events: write
|
MI300 compatibility (#1764)
Adds support for AMD Instinct MI300 in TGI.
Most changes are:
* Support PyTorch TunableOp to pick the GEMM/GEMV kernels for decoding
https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/cuda/tunable.
TunableOp is disabled by default, and can be enabled with
`PYTORCH_TUNABLEOP_ENABLED=1`.
* Update ROCm dockerfile to PyTorch 2.3 (actually patched with changes
from https://github.com/pytorch/pytorch/pull/124362)
* Support SILU & Linear custom kernels contributed by AMD
* Update vLLM paged attention to https://github.com/fxmarty/rocm-vllm/,
branching out of a much more recent commit
https://github.com/ROCm/vllm/commit/3489ce7936c5de588916ae3047c44c23c0b0c308
* Support FA2 Triton kernel as recommended by AMD. Can be used by
specifying `ROCM_USE_FLASH_ATTN_V2_TRITON=1`.
* Update dockerfile to ROCm 6.1
By default, TunableOp tuning results are saved in `/data` (e.g.
`/data/tunableop_meta-llama-Llama-2-70b-chat-hf_tp1_rank0.csv`) in order
to avoid to have to rerun the tuning at each `docker run`.
Example:
```
Validator,PT_VERSION,2.3.0
Validator,ROCM_VERSION,6.1.0.0-82-5fabb4c
Validator,HIPBLASLT_VERSION,0.7.0-1549b021
Validator,GCN_ARCH_NAME,gfx942:sramecc+:xnack-
Validator,ROCBLAS_VERSION,4.1.0-cefa4a9b-dirty
GemmTunableOp_Half_TN,tn_8192_7_28672,Gemm_Rocblas_45475,0.132098
GemmTunableOp_Half_TN,tn_10240_4_8192,Gemm_Rocblas_45546,0.0484431
GemmTunableOp_Half_TN,tn_32000_6_8192,Default,0.149546
GemmTunableOp_Half_TN,tn_32000_3_8192,Gemm_Rocblas_45520,0.147119
GemmTunableOp_Half_TN,tn_8192_3_28672,Gemm_Rocblas_45475,0.132645
GemmTunableOp_Half_TN,tn_10240_3_8192,Gemm_Rocblas_45546,0.0482971
GemmTunableOp_Half_TN,tn_57344_5_8192,Gemm_Rocblas_45520,0.255694
GemmTunableOp_Half_TN,tn_10240_7_8192,Gemm_Rocblas_45517,0.0482522
GemmTunableOp_Half_TN,tn_8192_3_8192,Gemm_Rocblas_45546,0.0444671
GemmTunableOp_Half_TN,tn_8192_5_8192,Gemm_Rocblas_45546,0.0445834
GemmTunableOp_Half_TN,tn_57344_7_8192,Gemm_Rocblas_45520,0.25622
GemmTunableOp_Half_TN,tn_8192_2_28672,Gemm_Rocblas_45475,0.132122
GemmTunableOp_Half_TN,tn_8192_4_8192,Gemm_Rocblas_45517,0.0453191
GemmTunableOp_Half_TN,tn_10240_5_8192,Gemm_Rocblas_45517,0.0482514
GemmTunableOp_Half_TN,tn_8192_5_28672,Gemm_Rocblas_45542,0.133914
GemmTunableOp_Half_TN,tn_8192_2_8192,Gemm_Rocblas_45517,0.0446516
GemmTunableOp_Half_TN,tn_8192_1_28672,Gemm_Hipblaslt_TN_10814,0.131953
GemmTunableOp_Half_TN,tn_10240_2_8192,Gemm_Rocblas_45546,0.0481043
GemmTunableOp_Half_TN,tn_32000_4_8192,Gemm_Rocblas_45520,0.147497
GemmTunableOp_Half_TN,tn_8192_6_28672,Gemm_Rocblas_45529,0.134895
GemmTunableOp_Half_TN,tn_57344_2_8192,Gemm_Rocblas_45520,0.254716
GemmTunableOp_Half_TN,tn_57344_4_8192,Gemm_Rocblas_45520,0.255731
GemmTunableOp_Half_TN,tn_10240_6_8192,Gemm_Rocblas_45517,0.0484816
GemmTunableOp_Half_TN,tn_57344_3_8192,Gemm_Rocblas_45520,0.254701
GemmTunableOp_Half_TN,tn_8192_4_28672,Gemm_Rocblas_45475,0.132159
GemmTunableOp_Half_TN,tn_32000_2_8192,Default,0.147524
GemmTunableOp_Half_TN,tn_32000_5_8192,Default,0.147074
GemmTunableOp_Half_TN,tn_8192_6_8192,Gemm_Rocblas_45546,0.0454045
GemmTunableOp_Half_TN,tn_57344_6_8192,Gemm_Rocblas_45520,0.255582
GemmTunableOp_Half_TN,tn_32000_7_8192,Default,0.146705
GemmTunableOp_Half_TN,tn_8192_7_8192,Gemm_Rocblas_45546,0.0445489
```
---------
Co-authored-by: Mohit Sharma <mohit21sharma.ms@gmail.com>
2024-05-17 07:30:47 -06:00
|
|
|
outputs:
|
|
|
|
# env is not available in the later `container:`, but previous job outputs are.
|
|
|
|
short_sha: ${{ env.GITHUB_SHA_SHORT }}
|
2024-04-26 07:48:58 -06:00
|
|
|
steps:
|
|
|
|
- name: Checkout repository
|
|
|
|
uses: actions/checkout@v3
|
|
|
|
- name: Initialize Docker Buildx
|
|
|
|
uses: docker/setup-buildx-action@v2.0.0
|
|
|
|
with:
|
|
|
|
install: true
|
|
|
|
- name: Inject slug/short variables
|
|
|
|
uses: rlespinasse/github-slug-action@v4.4.1
|
|
|
|
- name: Tailscale
|
|
|
|
uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
|
|
|
|
with:
|
|
|
|
authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
|
|
|
|
- name: Login to GitHub Container Registry
|
|
|
|
if: github.event_name != 'pull_request'
|
|
|
|
uses: docker/login-action@v2
|
|
|
|
with:
|
|
|
|
registry: ghcr.io
|
|
|
|
username: ${{ github.actor }}
|
|
|
|
password: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Login to internal Container Registry
|
|
|
|
uses: docker/login-action@v2.1.0
|
|
|
|
with:
|
|
|
|
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
|
|
|
|
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
|
|
|
|
registry: registry.internal.huggingface.tech
|
|
|
|
- name: Login to Azure Container Registry
|
|
|
|
if: github.event_name != 'pull_request'
|
|
|
|
uses: docker/login-action@v2.1.0
|
|
|
|
with:
|
|
|
|
username: ${{ secrets.AZURE_DOCKER_USERNAME }}
|
|
|
|
password: ${{ secrets.AZURE_DOCKER_PASSWORD }}
|
|
|
|
registry: db4c2190dd824d1f950f5d1555fbadf0.azurecr.io
|
|
|
|
# If pull request
|
|
|
|
- name: Extract metadata (tags, labels) for Docker
|
|
|
|
if: ${{ github.event_name == 'pull_request' }}
|
|
|
|
id: meta-pr
|
|
|
|
uses: docker/metadata-action@v4.3.0
|
|
|
|
with:
|
|
|
|
images: |
|
|
|
|
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
|
|
|
|
tags: |
|
|
|
|
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-intel
|
|
|
|
# If main, release or tag
|
|
|
|
- name: Extract metadata (tags, labels) for Docker
|
|
|
|
if: ${{ github.event_name != 'pull_request' }}
|
|
|
|
id: meta
|
|
|
|
uses: docker/metadata-action@v4.3.0
|
|
|
|
with:
|
|
|
|
flavor: |
|
|
|
|
latest=false
|
|
|
|
images: |
|
|
|
|
registry.internal.huggingface.tech/api-inference/community/text-generation-inference
|
|
|
|
ghcr.io/huggingface/text-generation-inference
|
|
|
|
db4c2190dd824d1f950f5d1555fbadf0.azurecr.io/text-generation-inference
|
|
|
|
tags: |
|
|
|
|
type=semver,pattern={{version}}-intel
|
|
|
|
type=semver,pattern={{major}}.{{minor}}-intel
|
|
|
|
type=raw,value=latest-intel,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
|
|
|
|
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}-intel
|
|
|
|
- name: Build and push Docker image
|
|
|
|
id: build-and-push
|
|
|
|
uses: docker/build-push-action@v4
|
|
|
|
with:
|
|
|
|
context: .
|
|
|
|
file: Dockerfile_intel
|
|
|
|
push: true
|
|
|
|
platforms: 'linux/amd64'
|
|
|
|
build-args: |
|
|
|
|
GIT_SHA=${{ env.GITHUB_SHA }}
|
|
|
|
DOCKER_LABEL=sha-${{ env.GITHUB_SHA_SHORT }}-intel
|
|
|
|
tags: ${{ steps.meta.outputs.tags || steps.meta-pr.outputs.tags }}
|
|
|
|
labels: ${{ steps.meta.outputs.labels || steps.meta-pr.outputs.labels }}
|
|
|
|
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-intel,mode=min
|
|
|
|
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-intel,mode=min
|
|
|
|
|
2023-05-15 07:53:08 -06:00
|
|
|
stop-runner:
|
|
|
|
name: Stop self-hosted EC2 runner
|
2023-03-29 13:38:30 -06:00
|
|
|
needs:
|
2023-05-15 07:53:08 -06:00
|
|
|
- start-runner
|
2023-03-29 13:38:30 -06:00
|
|
|
- build-and-push-image
|
2023-11-27 06:08:12 -07:00
|
|
|
- build-and-push-image-rocm
|
2024-04-26 07:48:58 -06:00
|
|
|
- build-and-push-image-intel
|
2023-05-15 15:36:30 -06:00
|
|
|
- integration-tests
|
2023-04-14 02:12:21 -06:00
|
|
|
runs-on: ubuntu-latest
|
2023-05-15 07:53:08 -06:00
|
|
|
env:
|
|
|
|
AWS_REGION: us-east-1
|
|
|
|
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
|
2023-03-29 13:38:30 -06:00
|
|
|
steps:
|
2023-05-15 07:53:08 -06:00
|
|
|
- name: Configure AWS credentials
|
|
|
|
uses: aws-actions/configure-aws-credentials@v1
|
|
|
|
with:
|
|
|
|
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
|
|
|
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
|
|
|
aws-region: ${{ env.AWS_REGION }}
|
|
|
|
- name: Stop EC2 runner
|
|
|
|
uses: philschmid/philschmid-ec2-github-runner@main
|
|
|
|
with:
|
|
|
|
mode: stop
|
|
|
|
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
|
|
|
|
label: ${{ needs.start-runner.outputs.label }}
|
2023-05-23 08:49:11 -06:00
|
|
|
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}
|
MI300 compatibility (#1764)
Adds support for AMD Instinct MI300 in TGI.
Most changes are:
* Support PyTorch TunableOp to pick the GEMM/GEMV kernels for decoding
https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/cuda/tunable.
TunableOp is disabled by default, and can be enabled with
`PYTORCH_TUNABLEOP_ENABLED=1`.
* Update ROCm dockerfile to PyTorch 2.3 (actually patched with changes
from https://github.com/pytorch/pytorch/pull/124362)
* Support SILU & Linear custom kernels contributed by AMD
* Update vLLM paged attention to https://github.com/fxmarty/rocm-vllm/,
branching out of a much more recent commit
https://github.com/ROCm/vllm/commit/3489ce7936c5de588916ae3047c44c23c0b0c308
* Support FA2 Triton kernel as recommended by AMD. Can be used by
specifying `ROCM_USE_FLASH_ATTN_V2_TRITON=1`.
* Update dockerfile to ROCm 6.1
By default, TunableOp tuning results are saved in `/data` (e.g.
`/data/tunableop_meta-llama-Llama-2-70b-chat-hf_tp1_rank0.csv`) in order
to avoid to have to rerun the tuning at each `docker run`.
Example:
```
Validator,PT_VERSION,2.3.0
Validator,ROCM_VERSION,6.1.0.0-82-5fabb4c
Validator,HIPBLASLT_VERSION,0.7.0-1549b021
Validator,GCN_ARCH_NAME,gfx942:sramecc+:xnack-
Validator,ROCBLAS_VERSION,4.1.0-cefa4a9b-dirty
GemmTunableOp_Half_TN,tn_8192_7_28672,Gemm_Rocblas_45475,0.132098
GemmTunableOp_Half_TN,tn_10240_4_8192,Gemm_Rocblas_45546,0.0484431
GemmTunableOp_Half_TN,tn_32000_6_8192,Default,0.149546
GemmTunableOp_Half_TN,tn_32000_3_8192,Gemm_Rocblas_45520,0.147119
GemmTunableOp_Half_TN,tn_8192_3_28672,Gemm_Rocblas_45475,0.132645
GemmTunableOp_Half_TN,tn_10240_3_8192,Gemm_Rocblas_45546,0.0482971
GemmTunableOp_Half_TN,tn_57344_5_8192,Gemm_Rocblas_45520,0.255694
GemmTunableOp_Half_TN,tn_10240_7_8192,Gemm_Rocblas_45517,0.0482522
GemmTunableOp_Half_TN,tn_8192_3_8192,Gemm_Rocblas_45546,0.0444671
GemmTunableOp_Half_TN,tn_8192_5_8192,Gemm_Rocblas_45546,0.0445834
GemmTunableOp_Half_TN,tn_57344_7_8192,Gemm_Rocblas_45520,0.25622
GemmTunableOp_Half_TN,tn_8192_2_28672,Gemm_Rocblas_45475,0.132122
GemmTunableOp_Half_TN,tn_8192_4_8192,Gemm_Rocblas_45517,0.0453191
GemmTunableOp_Half_TN,tn_10240_5_8192,Gemm_Rocblas_45517,0.0482514
GemmTunableOp_Half_TN,tn_8192_5_28672,Gemm_Rocblas_45542,0.133914
GemmTunableOp_Half_TN,tn_8192_2_8192,Gemm_Rocblas_45517,0.0446516
GemmTunableOp_Half_TN,tn_8192_1_28672,Gemm_Hipblaslt_TN_10814,0.131953
GemmTunableOp_Half_TN,tn_10240_2_8192,Gemm_Rocblas_45546,0.0481043
GemmTunableOp_Half_TN,tn_32000_4_8192,Gemm_Rocblas_45520,0.147497
GemmTunableOp_Half_TN,tn_8192_6_28672,Gemm_Rocblas_45529,0.134895
GemmTunableOp_Half_TN,tn_57344_2_8192,Gemm_Rocblas_45520,0.254716
GemmTunableOp_Half_TN,tn_57344_4_8192,Gemm_Rocblas_45520,0.255731
GemmTunableOp_Half_TN,tn_10240_6_8192,Gemm_Rocblas_45517,0.0484816
GemmTunableOp_Half_TN,tn_57344_3_8192,Gemm_Rocblas_45520,0.254701
GemmTunableOp_Half_TN,tn_8192_4_28672,Gemm_Rocblas_45475,0.132159
GemmTunableOp_Half_TN,tn_32000_2_8192,Default,0.147524
GemmTunableOp_Half_TN,tn_32000_5_8192,Default,0.147074
GemmTunableOp_Half_TN,tn_8192_6_8192,Gemm_Rocblas_45546,0.0454045
GemmTunableOp_Half_TN,tn_57344_6_8192,Gemm_Rocblas_45520,0.255582
GemmTunableOp_Half_TN,tn_32000_7_8192,Default,0.146705
GemmTunableOp_Half_TN,tn_8192_7_8192,Gemm_Rocblas_45546,0.0445489
```
---------
Co-authored-by: Mohit Sharma <mohit21sharma.ms@gmail.com>
2024-05-17 07:30:47 -06:00
|
|
|
|
2024-05-17 11:50:52 -06:00
|
|
|
# TODO: Move this to `build_amd.yml` (and `build_nvidia.yml`)
|
|
|
|
|
|
|
|
# integration-tests-rocm:
|
|
|
|
# concurrency:
|
|
|
|
# group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
|
|
|
|
# cancel-in-progress: true
|
|
|
|
# needs:
|
|
|
|
# - start-runner
|
|
|
|
# - build-and-push-image
|
|
|
|
# - integration-tests
|
|
|
|
# - build-and-push-image-rocm
|
|
|
|
# - stop-runner
|
|
|
|
# runs-on: [self-hosted, amd-gpu, multi-gpu, mi300]
|
|
|
|
# container:
|
|
|
|
# image: registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ needs.build-and-push-image-rocm.outputs.short_sha }}-rocm
|
|
|
|
# options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/cache
|
|
|
|
# env:
|
|
|
|
# DOCKER_VOLUME: /cache
|
|
|
|
# steps:
|
|
|
|
# - name: ROCM-SMI
|
|
|
|
# run: |
|
|
|
|
# rocm-smi
|
|
|
|
# - name: ROCM-INFO
|
|
|
|
# run: |
|
|
|
|
# rocminfo | grep "Agent" -A 14
|
|
|
|
# - name: Show ROCR environment
|
|
|
|
# run: |
|
|
|
|
# echo "ROCR: $ROCR_VISIBLE_DEVICES"
|
|
|
|
# - name: Install
|
|
|
|
# run: |
|
|
|
|
# make install-integration-tests
|
|
|
|
# - name: Run tests
|
|
|
|
# run: |
|
|
|
|
# export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
|
|
|
|
# pytest -s -vv integration-tests
|