feat: aws sagemaker compatible image (#147)

The only difference is that now it pushes to
registry.internal.huggingface.tech/api-inference/community/text-generation-inference/sagemaker:...
instead of
registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sagemaker-...

---------

Co-authored-by: Philipp Schmid <32632186+philschmid@users.noreply.github.com>
This commit is contained in:
OlivierDehaene 2023-03-29 21:38:30 +02:00 committed by GitHub
parent c9bdaa8b73
commit d503e8f09d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 86 additions and 3 deletions

View File

@ -84,3 +84,47 @@ jobs:
labels: ${{ steps.meta.outputs.labels }} labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
build-and-push-sagemaker-image:
needs:
- build-and-push-image
runs-on: ubuntu-latest
steps:
- name: Initialize Docker Buildx
uses: docker/setup-buildx-action@v2.0.0
with:
install: true
- name: Checkout repository
uses: actions/checkout@v3
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
- name: Login to internal Container Registry
uses: docker/login-action@v2.1.0
with:
username: ${{ secrets.TAILSCALE_DOCKER_USERNAME }}
password: ${{ secrets.TAILSCALE_DOCKER_PASSWORD }}
registry: registry.internal.huggingface.tech
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v4.3.0
with:
flavor: |
latest=auto
images: |
registry.internal.huggingface.tech/api-inference/community/text-generation-inference/sagemaker
tags: |
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=sha-${{ env.GITHUB_SHA_SHORT }}
- name: Build and push Docker image
uses: docker/build-push-action@v2
with:
context: .
file: Dockerfile
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
target: sagemaker
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max

View File

@ -27,7 +27,7 @@ COPY router router
COPY launcher launcher COPY launcher launcher
RUN cargo build --release RUN cargo build --release
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 as base
ENV LANG=C.UTF-8 \ ENV LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \ LC_ALL=C.UTF-8 \
@ -76,5 +76,16 @@ COPY --from=builder /usr/src/target/release/text-generation-router /usr/local/bi
# Install launcher # Install launcher
COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/bin/text-generation-launcher
# AWS Sagemaker compatbile image
FROM base as sagemaker
COPY sagemaker-entrypoint.sh entrypoint.sh
RUN chmod +x entrypoint.sh
ENTRYPOINT ["./entrypoint.sh"]
# Original image
FROM base
ENTRYPOINT ["text-generation-launcher"] ENTRYPOINT ["text-generation-launcher"]
CMD ["--json-output"] CMD ["--json-output"]

View File

@ -529,11 +529,19 @@ pub async fn run(
// Create router // Create router
let app = Router::new() let app = Router::new()
.merge(SwaggerUi::new("/docs").url("/api-doc/openapi.json", ApiDoc::openapi())) .merge(SwaggerUi::new("/docs").url("/api-doc/openapi.json", ApiDoc::openapi()))
// Base routes
.route("/", post(compat_generate)) .route("/", post(compat_generate))
.route("/generate", post(generate)) .route("/generate", post(generate))
.route("/generate_stream", post(generate_stream)) .route("/generate_stream", post(generate_stream))
.route("/", get(health)) // AWS Sagemaker route
.route("/invocations", post(compat_generate))
// Base Health route
.route("/health", get(health)) .route("/health", get(health))
// Inference API health route
.route("/", get(health))
// AWS Sagemaker health route
.route("/ping", get(health))
// Prometheus metrics route
.route("/metrics", get(metrics)) .route("/metrics", get(metrics))
.layer(Extension(compat_return_full_text)) .layer(Extension(compat_return_full_text))
.layer(Extension(infer)) .layer(Extension(infer))

20
sagemaker-entrypoint.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
if [[ -z "${HF_MODEL_ID}" ]]; then
echo "HF_MODEL_ID must be set"
exit 1
fi
if [[ -n "${HF_MODEL_REVISION}" ]]; then
export REVISION="${HF_MODEL_REVISION}"
fi
if [[ -n "${SM_NUM_GPUS}" ]]; then
export NUM_SHARD="${SM_NUM_GPUS}"
fi
if [[ -n "${HF_MODEL_QUANTIZE}" ]]; then
export QUANTIZE="${HF_MODEL_QUANTIZE}"
fi
text-generation-launcher --port 8080