feat(router): add git sha to info route (#208)

2023-04-19 21:36:59 +02:00 · 2023-04-19 21:36:59 +02:00 · b6ee0ec7b0
parent 252f42c1e6
commit b6ee0ec7b0
4 changed files with 33 additions and 9 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -95,6 +95,8 @@ jobs:
          file: Dockerfile
          push: ${{ github.event_name != 'pull_request' }}
          platforms: 'linux/amd64'
+          build-args: |
+            GIT_SHA={{ env.GITHUB_SHA }}
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
          cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
@ -176,6 +178,8 @@ jobs:
          file: Dockerfile
          push: ${{ github.event_name != 'pull_request' }}
          platforms: 'linux/amd64'
+          build-args: |
+            GIT_SHA={{ env.GITHUB_SHA }}
          target: sagemaker
          tags: ${{ steps.meta.outputs.tags }}
          labels: ${{ steps.meta.outputs.labels }}
--- a/2
+++ b/2
@ -12,6 +12,8 @@ RUN cargo chef prepare --recipe-path recipe.json

 FROM chef AS builder

+ARG GIT_SHA
+
 RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
    curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
    unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
--- a/router/build.rs
+++ b/router/build.rs
@ -2,6 +2,18 @@ use std::error::Error;
 use vergen::EmitBuilder;

 fn main() -> Result<(), Box<dyn Error>> {
-    EmitBuilder::builder().git_sha(false).emit()?;
+    // Try to get the git sha from the local git repository
+    if EmitBuilder::builder()
+        .fail_on_error()
+        .git_sha(false)
+        .emit()
+        .is_err()
+    {
+        // Unable to get the git sha
+        if let Ok(sha) = std::env::var("GIT_SHA") {
+            // Set it from an env var
+            println!("cargo:rustc-env=VERGEN_GIT_SHA={sha}");
+        }
+    }
    Ok(())
 }
--- a/server/text_generation_server/models/init.py
+++ b/server/text_generation_server/models/init.py
@ -17,13 +17,6 @@ from text_generation_server.models.gpt_neox import GPTNeoxSharded
 from text_generation_server.models.t5 import T5Sharded

 try:
-    from text_generation_server.models.flash_neox import FlashNeoX, FlashNeoXSharded
-    from text_generation_server.models.flash_llama import FlashLlama, FlashLlamaSharded
-    from text_generation_server.models.flash_santacoder import (
-        FlashSantacoder,
-        FlashSantacoderSharded,
-    )
-
    if torch.cuda.is_available():
        major, minor = torch.cuda.get_device_capability()
        is_sm75 = major == 7 and minor == 5
@ -32,7 +25,20 @@ try:

        supported = is_sm75 or is_sm8x or is_sm90
        if not supported:
-            raise ImportError(f"GPU with CUDA capability {major} {minor} is not supported")
+            raise ImportError(
+                f"GPU with CUDA capability {major} {minor} is not supported"
+            )
+
+        from text_generation_server.models.flash_neox import FlashNeoX, FlashNeoXSharded
+        from text_generation_server.models.flash_llama import (
+            FlashLlama,
+            FlashLlamaSharded,
+        )
+        from text_generation_server.models.flash_santacoder import (
+            FlashSantacoder,
+            FlashSantacoderSharded,
+        )
+
        FLASH_ATTENTION = True
    else:
        FLASH_ATTENTION = False