feat(router): add git sha to info route (#208)
This commit is contained in:
parent
252f42c1e6
commit
b6ee0ec7b0
|
@ -95,6 +95,8 @@ jobs:
|
||||||
file: Dockerfile
|
file: Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
|
build-args: |
|
||||||
|
GIT_SHA={{ env.GITHUB_SHA }}
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
|
cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=max
|
||||||
|
@ -176,6 +178,8 @@ jobs:
|
||||||
file: Dockerfile
|
file: Dockerfile
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
platforms: 'linux/amd64'
|
platforms: 'linux/amd64'
|
||||||
|
build-args: |
|
||||||
|
GIT_SHA={{ env.GITHUB_SHA }}
|
||||||
target: sagemaker
|
target: sagemaker
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
|
|
|
@ -12,6 +12,8 @@ RUN cargo chef prepare --recipe-path recipe.json
|
||||||
|
|
||||||
FROM chef AS builder
|
FROM chef AS builder
|
||||||
|
|
||||||
|
ARG GIT_SHA
|
||||||
|
|
||||||
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
|
||||||
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
|
||||||
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
|
||||||
|
|
|
@ -2,6 +2,18 @@ use std::error::Error;
|
||||||
use vergen::EmitBuilder;
|
use vergen::EmitBuilder;
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn Error>> {
|
fn main() -> Result<(), Box<dyn Error>> {
|
||||||
EmitBuilder::builder().git_sha(false).emit()?;
|
// Try to get the git sha from the local git repository
|
||||||
|
if EmitBuilder::builder()
|
||||||
|
.fail_on_error()
|
||||||
|
.git_sha(false)
|
||||||
|
.emit()
|
||||||
|
.is_err()
|
||||||
|
{
|
||||||
|
// Unable to get the git sha
|
||||||
|
if let Ok(sha) = std::env::var("GIT_SHA") {
|
||||||
|
// Set it from an env var
|
||||||
|
println!("cargo:rustc-env=VERGEN_GIT_SHA={sha}");
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,13 +17,6 @@ from text_generation_server.models.gpt_neox import GPTNeoxSharded
|
||||||
from text_generation_server.models.t5 import T5Sharded
|
from text_generation_server.models.t5 import T5Sharded
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from text_generation_server.models.flash_neox import FlashNeoX, FlashNeoXSharded
|
|
||||||
from text_generation_server.models.flash_llama import FlashLlama, FlashLlamaSharded
|
|
||||||
from text_generation_server.models.flash_santacoder import (
|
|
||||||
FlashSantacoder,
|
|
||||||
FlashSantacoderSharded,
|
|
||||||
)
|
|
||||||
|
|
||||||
if torch.cuda.is_available():
|
if torch.cuda.is_available():
|
||||||
major, minor = torch.cuda.get_device_capability()
|
major, minor = torch.cuda.get_device_capability()
|
||||||
is_sm75 = major == 7 and minor == 5
|
is_sm75 = major == 7 and minor == 5
|
||||||
|
@ -32,7 +25,20 @@ try:
|
||||||
|
|
||||||
supported = is_sm75 or is_sm8x or is_sm90
|
supported = is_sm75 or is_sm8x or is_sm90
|
||||||
if not supported:
|
if not supported:
|
||||||
raise ImportError(f"GPU with CUDA capability {major} {minor} is not supported")
|
raise ImportError(
|
||||||
|
f"GPU with CUDA capability {major} {minor} is not supported"
|
||||||
|
)
|
||||||
|
|
||||||
|
from text_generation_server.models.flash_neox import FlashNeoX, FlashNeoXSharded
|
||||||
|
from text_generation_server.models.flash_llama import (
|
||||||
|
FlashLlama,
|
||||||
|
FlashLlamaSharded,
|
||||||
|
)
|
||||||
|
from text_generation_server.models.flash_santacoder import (
|
||||||
|
FlashSantacoder,
|
||||||
|
FlashSantacoderSharded,
|
||||||
|
)
|
||||||
|
|
||||||
FLASH_ATTENTION = True
|
FLASH_ATTENTION = True
|
||||||
else:
|
else:
|
||||||
FLASH_ATTENTION = False
|
FLASH_ATTENTION = False
|
||||||
|
|
Loading…
Reference in New Issue