feat(router): add git sha to info route (#208)
This commit is contained in:
parent
252f42c1e6
commit
b6ee0ec7b0
|
@ -95,6 +95,8 @@ jobs:
|
|||
file: Dockerfile
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
platforms: 'linux/amd64'
|
||||
build-args: |
|
||||
GIT_SHA={{ env.GITHUB_SHA }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
|
||||
|
@ -176,6 +178,8 @@ jobs:
|
|||
file: Dockerfile
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
platforms: 'linux/amd64'
|
||||
build-args: |
|
||||
GIT_SHA={{ env.GITHUB_SHA }}
|
||||
target: sagemaker
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
|
|
|
@ -12,6 +12,8 @@ RUN cargo chef prepare --recipe-path recipe.json
|
|||
|
||||
FROM chef AS builder
|
||||
|
||||
ARG GIT_SHA
|
||||
|
||||
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
||||
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
||||
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
||||
|
|
|
@ -2,6 +2,18 @@ use std::error::Error;
|
|||
use vergen::EmitBuilder;
|
||||
|
||||
fn main() -> Result<(), Box<dyn Error>> {
|
||||
EmitBuilder::builder().git_sha(false).emit()?;
|
||||
// Try to get the git sha from the local git repository
|
||||
if EmitBuilder::builder()
|
||||
.fail_on_error()
|
||||
.git_sha(false)
|
||||
.emit()
|
||||
.is_err()
|
||||
{
|
||||
// Unable to get the git sha
|
||||
if let Ok(sha) = std::env::var("GIT_SHA") {
|
||||
// Set it from an env var
|
||||
println!("cargo:rustc-env=VERGEN_GIT_SHA={sha}");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -17,13 +17,6 @@ from text_generation_server.models.gpt_neox import GPTNeoxSharded
|
|||
from text_generation_server.models.t5 import T5Sharded
|
||||
|
||||
try:
|
||||
from text_generation_server.models.flash_neox import FlashNeoX, FlashNeoXSharded
|
||||
from text_generation_server.models.flash_llama import FlashLlama, FlashLlamaSharded
|
||||
from text_generation_server.models.flash_santacoder import (
|
||||
FlashSantacoder,
|
||||
FlashSantacoderSharded,
|
||||
)
|
||||
|
||||
if torch.cuda.is_available():
|
||||
major, minor = torch.cuda.get_device_capability()
|
||||
is_sm75 = major == 7 and minor == 5
|
||||
|
@ -32,7 +25,20 @@ try:
|
|||
|
||||
supported = is_sm75 or is_sm8x or is_sm90
|
||||
if not supported:
|
||||
raise ImportError(f"GPU with CUDA capability {major} {minor} is not supported")
|
||||
raise ImportError(
|
||||
f"GPU with CUDA capability {major} {minor} is not supported"
|
||||
)
|
||||
|
||||
from text_generation_server.models.flash_neox import FlashNeoX, FlashNeoXSharded
|
||||
from text_generation_server.models.flash_llama import (
|
||||
FlashLlama,
|
||||
FlashLlamaSharded,
|
||||
)
|
||||
from text_generation_server.models.flash_santacoder import (
|
||||
FlashSantacoder,
|
||||
FlashSantacoderSharded,
|
||||
)
|
||||
|
||||
FLASH_ATTENTION = True
|
||||
else:
|
||||
FLASH_ATTENTION = False
|
||||
|
|
Loading…
Reference in New Issue