2023-04-26 12:23:54 -06:00
|
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
|
|
|
use std::sync::Arc;
|
2024-02-15 02:28:10 -07:00
|
|
|
use text_generation_client::GrammarType as ProtoGrammarType;
|
2023-04-26 12:23:54 -06:00
|
|
|
use text_generation_client::{
|
|
|
|
Batch, NextTokenChooserParameters, Request, ShardedClient, StoppingCriteriaParameters,
|
|
|
|
};
|
|
|
|
|
2023-04-27 11:16:35 -06:00
|
|
|
// Note: Request ids and batch ids cannot collide.
|
|
|
|
const LIVENESS_ID: u64 = u64::MAX;
|
|
|
|
const BATCH_ID: u64 = u64::MAX;
|
|
|
|
|
2023-04-26 12:23:54 -06:00
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
pub(crate) struct Health {
|
|
|
|
client: ShardedClient,
|
|
|
|
generation_health: Arc<AtomicBool>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Health {
|
|
|
|
pub(crate) fn new(client: ShardedClient, generation_health: Arc<AtomicBool>) -> Self {
|
|
|
|
Self {
|
|
|
|
client,
|
|
|
|
generation_health,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub(crate) async fn check(&mut self) -> bool {
|
|
|
|
if self.generation_health.load(Ordering::SeqCst) {
|
|
|
|
// Generation is healthy, we only check that the shards are answering gRPC calls
|
|
|
|
self.client.health().await.is_ok()
|
|
|
|
} else {
|
|
|
|
// Generation is unhealthy or have not sent any generation request yet
|
|
|
|
|
|
|
|
// Dummy batch of 1 token and 1 generated token
|
|
|
|
let liveness_request = Request {
|
2023-04-27 11:16:35 -06:00
|
|
|
id: LIVENESS_ID,
|
2023-04-26 12:23:54 -06:00
|
|
|
inputs: "liveness".to_string(),
|
|
|
|
truncate: 10,
|
2023-06-02 09:12:30 -06:00
|
|
|
prefill_logprobs: false,
|
2023-04-26 12:23:54 -06:00
|
|
|
parameters: Some(NextTokenChooserParameters {
|
|
|
|
temperature: 1.0,
|
|
|
|
top_k: 0,
|
|
|
|
top_p: 1.0,
|
|
|
|
typical_p: 1.0,
|
|
|
|
do_sample: false,
|
|
|
|
seed: 0,
|
|
|
|
repetition_penalty: 1.0,
|
2024-02-08 10:41:25 -07:00
|
|
|
frequency_penalty: 0.0,
|
2023-04-26 12:23:54 -06:00
|
|
|
watermark: false,
|
2024-02-15 02:28:10 -07:00
|
|
|
grammar: String::new(),
|
|
|
|
grammar_type: ProtoGrammarType::None as i32,
|
2023-04-26 12:23:54 -06:00
|
|
|
}),
|
|
|
|
stopping_parameters: Some(StoppingCriteriaParameters {
|
|
|
|
max_new_tokens: 1,
|
|
|
|
stop_sequences: vec![],
|
|
|
|
ignore_eos_token: false,
|
|
|
|
}),
|
2023-08-28 03:43:47 -06:00
|
|
|
top_n_tokens: 0,
|
2023-04-26 12:23:54 -06:00
|
|
|
};
|
|
|
|
let batch = Batch {
|
2023-04-27 11:16:35 -06:00
|
|
|
id: BATCH_ID,
|
2023-04-26 12:23:54 -06:00
|
|
|
requests: vec![liveness_request],
|
|
|
|
size: 1,
|
|
|
|
max_tokens: 2,
|
|
|
|
};
|
|
|
|
// Skips the queue
|
|
|
|
let value = self.client.prefill(batch).await.is_ok();
|
|
|
|
// Update generation health
|
|
|
|
self.generation_health.store(value, Ordering::SeqCst);
|
|
|
|
value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|