parent
fc9c3153e5
commit
a2a97b05d6
|
@ -153,7 +153,7 @@ this will impact performance.
|
||||||
### Distributed Tracing
|
### Distributed Tracing
|
||||||
|
|
||||||
`text-generation-inference` is instrumented with distributed tracing using OpenTelemetry. You can use this feature
|
`text-generation-inference` is instrumented with distributed tracing using OpenTelemetry. You can use this feature
|
||||||
by setting the address to an OTLP collector with the `--otlp-endpoint` argument. The default service name can be
|
by setting the address to an OTLP collector with the `--otlp-endpoint` argument. The default service name can be
|
||||||
overridden with the `--otlp-service-name` argument
|
overridden with the `--otlp-service-name` argument
|
||||||
|
|
||||||
### Architecture
|
### Architecture
|
||||||
|
|
|
@ -147,7 +147,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
tracing::info!("Downloading tokenizer");
|
tracing::info!("Downloading tokenizer");
|
||||||
|
|
||||||
// Parse Huggingface hub token
|
// Parse Huggingface hub token
|
||||||
let auth_token = std::env::var("HF_TOKEN").or_else(|_| std::env::var("HUGGING_FACE_HUB_TOKEN")).ok();
|
let auth_token = std::env::var("HF_TOKEN")
|
||||||
|
.or_else(|_| std::env::var("HUGGING_FACE_HUB_TOKEN"))
|
||||||
|
.ok();
|
||||||
|
|
||||||
// Download and instantiate tokenizer
|
// Download and instantiate tokenizer
|
||||||
// We need to download it outside of the Tokio runtime
|
// We need to download it outside of the Tokio runtime
|
||||||
|
|
|
@ -762,7 +762,7 @@ fn num_cuda_devices() -> Option<usize> {
|
||||||
Err(_) => match env::var("NVIDIA_VISIBLE_DEVICES") {
|
Err(_) => match env::var("NVIDIA_VISIBLE_DEVICES") {
|
||||||
Ok(devices) => devices,
|
Ok(devices) => devices,
|
||||||
Err(_) => env::var("ZE_AFFINITY_MASK").ok()?,
|
Err(_) => env::var("ZE_AFFINITY_MASK").ok()?,
|
||||||
}
|
},
|
||||||
};
|
};
|
||||||
let n_devices = devices.split(',').count();
|
let n_devices = devices.split(',').count();
|
||||||
Some(n_devices)
|
Some(n_devices)
|
||||||
|
@ -1225,7 +1225,6 @@ fn spawn_webserver(
|
||||||
router_args.push("--otlp-service-name".to_string());
|
router_args.push("--otlp-service-name".to_string());
|
||||||
router_args.push(otlp_service_name);
|
router_args.push(otlp_service_name);
|
||||||
|
|
||||||
|
|
||||||
// CORS origins
|
// CORS origins
|
||||||
for origin in args.cors_allow_origin.into_iter() {
|
for origin in args.cors_allow_origin.into_iter() {
|
||||||
router_args.push("--cors-allow-origin".to_string());
|
router_args.push("--cors-allow-origin".to_string());
|
||||||
|
|
|
@ -159,7 +159,9 @@ async fn main() -> Result<(), RouterError> {
|
||||||
});
|
});
|
||||||
|
|
||||||
// Parse Huggingface hub token
|
// Parse Huggingface hub token
|
||||||
let authorization_token = std::env::var("HF_TOKEN").or_else(|_| std::env::var("HUGGING_FACE_HUB_TOKEN")).ok();
|
let authorization_token = std::env::var("HF_TOKEN")
|
||||||
|
.or_else(|_| std::env::var("HUGGING_FACE_HUB_TOKEN"))
|
||||||
|
.ok();
|
||||||
|
|
||||||
// Tokenizer instance
|
// Tokenizer instance
|
||||||
// This will only be used to validate payloads
|
// This will only be used to validate payloads
|
||||||
|
|
Loading…
Reference in New Issue