From b6ee0ec7b06b11ad35fe2f2d99318ddc47f95558 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <olivier@huggingface.co>
Date: Wed, 19 Apr 2023 21:36:59 +0200
Subject: [PATCH] feat(router): add git sha to info route (#208)

---
 .github/workflows/build.yaml                  |  4 ++++
 Dockerfile                                    |  2 ++
 router/build.rs                               | 14 +++++++++++-
 .../text_generation_server/models/__init__.py | 22 ++++++++++++-------
 4 files changed, 33 insertions(+), 9 deletions(-)
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index aa7db30a..fe7cb706 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -95,6 +95,8 @@ jobs:
           file: Dockerfile
           push: ${{ github.event_name != 'pull_request' }}
           platforms: 'linux/amd64'
+          build-args: |
+            GIT_SHA={{ env.GITHUB_SHA }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
           cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
@@ -176,6 +178,8 @@ jobs:
           file: Dockerfile
           push: ${{ github.event_name != 'pull_request' }}
           platforms: 'linux/amd64'
+          build-args: |
+            GIT_SHA={{ env.GITHUB_SHA }}
           target: sagemaker
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
diff --git a/Dockerfile b/Dockerfile
index 61f37ed9..a679db7e 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,6 +12,8 @@ RUN cargo chef prepare --recipe-path recipe.json
 
 FROM chef AS builder
 
+ARG GIT_SHA
+
 RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
     curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
     unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
diff --git a/router/build.rs b/router/build.rs
index c34f9fa8..1b1fdc86 100644
--- a/router/build.rs
+++ b/router/build.rs
@@ -2,6 +2,18 @@ use std::error::Error;
 use vergen::EmitBuilder;
 
 fn main() -> Result<(), Box<dyn Error>> {
-    EmitBuilder::builder().git_sha(false).emit()?;
+    // Try to get the git sha from the local git repository
+    if EmitBuilder::builder()
+        .fail_on_error()
+        .git_sha(false)
+        .emit()
+        .is_err()
+    {
+        // Unable to get the git sha
+        if let Ok(sha) = std::env::var("GIT_SHA") {
+            // Set it from an env var
+            println!("cargo:rustc-env=VERGEN_GIT_SHA={sha}");
+        }
+    }
     Ok(())
 }
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index 0a29b3cc..74a7483e 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -17,13 +17,6 @@ from text_generation_server.models.gpt_neox import GPTNeoxSharded
 from text_generation_server.models.t5 import T5Sharded
 
 try:
-    from text_generation_server.models.flash_neox import FlashNeoX, FlashNeoXSharded
-    from text_generation_server.models.flash_llama import FlashLlama, FlashLlamaSharded
-    from text_generation_server.models.flash_santacoder import (
-        FlashSantacoder,
-        FlashSantacoderSharded,
-    )
-
     if torch.cuda.is_available():
         major, minor = torch.cuda.get_device_capability()
         is_sm75 = major == 7 and minor == 5
@@ -32,7 +25,20 @@ try:
 
         supported = is_sm75 or is_sm8x or is_sm90
         if not supported:
-            raise ImportError(f"GPU with CUDA capability {major} {minor} is not supported")
+            raise ImportError(
+                f"GPU with CUDA capability {major} {minor} is not supported"
+            )
+
+        from text_generation_server.models.flash_neox import FlashNeoX, FlashNeoXSharded
+        from text_generation_server.models.flash_llama import (
+            FlashLlama,
+            FlashLlamaSharded,
+        )
+        from text_generation_server.models.flash_santacoder import (
+            FlashSantacoder,
+            FlashSantacoderSharded,
+        )
+
         FLASH_ATTENTION = True
     else:
         FLASH_ATTENTION = False