feat(router): Add const parameters to validation logic (#15)
I noticed some opportunity to collapse some of the logic, in case you are interested.
This commit is contained in:
parent
3efa5bbbfd
commit
60472f9d2b
|
@ -71,12 +71,8 @@ impl State {
|
||||||
id: *id,
|
id: *id,
|
||||||
inputs: entry.request.inputs.clone(),
|
inputs: entry.request.inputs.clone(),
|
||||||
input_length: entry.input_length as u32,
|
input_length: entry.input_length as u32,
|
||||||
parameters: Some(NextTokenChooserParameters::from(
|
parameters: Some((&entry.request.parameters).into()),
|
||||||
entry.request.parameters.clone(),
|
stopping_parameters: Some(entry.request.parameters.clone().into()),
|
||||||
)),
|
|
||||||
stopping_parameters: Some(StoppingCriteriaParameters::from(
|
|
||||||
entry.request.parameters.clone(),
|
|
||||||
)),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
ids.push(*id);
|
ids.push(*id);
|
||||||
|
@ -162,8 +158,8 @@ impl Db {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<GenerateParameters> for NextTokenChooserParameters {
|
impl From<&GenerateParameters> for NextTokenChooserParameters {
|
||||||
fn from(parameters: GenerateParameters) -> Self {
|
fn from(parameters: &GenerateParameters) -> Self {
|
||||||
Self {
|
Self {
|
||||||
temperature: parameters.temperature,
|
temperature: parameters.temperature,
|
||||||
top_k: parameters.top_k as u32,
|
top_k: parameters.top_k as u32,
|
||||||
|
|
|
@ -4,12 +4,11 @@ use axum::http::StatusCode;
|
||||||
use axum::Json;
|
use axum::Json;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokenizers::tokenizer::Tokenizer;
|
use tokenizers::tokenizer::Tokenizer;
|
||||||
use tokenizers::{
|
|
||||||
DecoderWrapper, ModelWrapper, NormalizerWrapper, PostProcessorWrapper, PreTokenizerWrapper,
|
|
||||||
TokenizerImpl,
|
|
||||||
};
|
|
||||||
use tokio::sync::{mpsc, oneshot};
|
use tokio::sync::{mpsc, oneshot};
|
||||||
|
|
||||||
|
const MAX_MAX_NEW_TOKENS: u32 = 512;
|
||||||
|
const MAX_STOP_SEQUENCES: usize = 4;
|
||||||
|
|
||||||
/// Validation
|
/// Validation
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Validation {
|
pub struct Validation {
|
||||||
|
@ -63,7 +62,7 @@ async fn validation_task(
|
||||||
|
|
||||||
// Create workers
|
// Create workers
|
||||||
for _ in 0..workers {
|
for _ in 0..workers {
|
||||||
let tokenizer_clone = tokenizer.clone();
|
let tokenizer_clone: Tokenizer = tokenizer.clone().into();
|
||||||
// Create channel to communicate with worker
|
// Create channel to communicate with worker
|
||||||
let (worker_sender, worker_receiver) = mpsc::channel(workers);
|
let (worker_sender, worker_receiver) = mpsc::channel(workers);
|
||||||
workers_senders.push(worker_sender);
|
workers_senders.push(worker_sender);
|
||||||
|
@ -89,68 +88,54 @@ async fn validation_task(
|
||||||
/// Check the parameters inside the payload and get the number of tokens inside the input using
|
/// Check the parameters inside the payload and get the number of tokens inside the input using
|
||||||
/// the tokenizer
|
/// the tokenizer
|
||||||
fn validation_worker(
|
fn validation_worker(
|
||||||
tokenizer: TokenizerImpl<
|
tokenizer: Tokenizer,
|
||||||
ModelWrapper,
|
|
||||||
NormalizerWrapper,
|
|
||||||
PreTokenizerWrapper,
|
|
||||||
PostProcessorWrapper,
|
|
||||||
DecoderWrapper,
|
|
||||||
>,
|
|
||||||
max_input_length: usize,
|
max_input_length: usize,
|
||||||
mut receiver: mpsc::Receiver<ValidationRequest>,
|
mut receiver: mpsc::Receiver<ValidationRequest>,
|
||||||
) {
|
) {
|
||||||
// Loop over requests
|
// Loop over requests
|
||||||
while let Some((request, response_tx)) = receiver.blocking_recv() {
|
while let Some((request, response_tx)) = receiver.blocking_recv() {
|
||||||
if request.parameters.temperature <= 0.0 {
|
response_tx.send(validate(request, &tokenizer, max_input_length)).unwrap_or(())
|
||||||
response_tx
|
}
|
||||||
.send(Err(ValidationError::Temperature))
|
}
|
||||||
.unwrap_or(());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if request.parameters.top_p <= 0.0 || request.parameters.top_p > 1.0 {
|
|
||||||
response_tx.send(Err(ValidationError::TopP)).unwrap_or(());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if request.parameters.top_k < 0 {
|
|
||||||
response_tx.send(Err(ValidationError::TopK)).unwrap_or(());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if request.parameters.max_new_tokens > 512 {
|
|
||||||
response_tx
|
|
||||||
.send(Err(ValidationError::MaxNewTokens))
|
|
||||||
.unwrap_or(());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if request.parameters.stop.len() > 4 {
|
|
||||||
response_tx
|
|
||||||
.send(Err(ValidationError::StopSequence(
|
|
||||||
request.parameters.stop.len(),
|
|
||||||
)))
|
|
||||||
.unwrap_or(());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the number of tokens in the input
|
fn validate(
|
||||||
match tokenizer.encode(request.inputs.clone(), true) {
|
request: GenerateRequest,
|
||||||
Ok(inputs) => {
|
tokenizer: &Tokenizer,
|
||||||
let input_length = inputs.len();
|
max_input_length: usize,
|
||||||
|
) -> Result<(usize, GenerateRequest), ValidationError> {
|
||||||
|
if request.parameters.temperature <= 0.0 {
|
||||||
|
return Err(ValidationError::Temperature);
|
||||||
|
}
|
||||||
|
if request.parameters.top_p <= 0.0 || request.parameters.top_p > 1.0 {
|
||||||
|
return Err(ValidationError::TopP);
|
||||||
|
}
|
||||||
|
if request.parameters.top_k < 0 {
|
||||||
|
return Err(ValidationError::TopK);
|
||||||
|
}
|
||||||
|
if request.parameters.max_new_tokens > MAX_MAX_NEW_TOKENS {
|
||||||
|
return Err(ValidationError::MaxNewTokens(MAX_MAX_NEW_TOKENS));
|
||||||
|
}
|
||||||
|
if request.parameters.stop.len() > MAX_STOP_SEQUENCES {
|
||||||
|
return Err(ValidationError::StopSequence(
|
||||||
|
MAX_STOP_SEQUENCES, request.parameters.stop.len(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
if input_length > max_input_length {
|
// Get the number of tokens in the input
|
||||||
response_tx
|
match tokenizer.encode(request.inputs.clone(), true) {
|
||||||
.send(Err(ValidationError::InputLength(
|
Ok(inputs) => {
|
||||||
input_length,
|
let input_length = inputs.len();
|
||||||
max_input_length,
|
|
||||||
)))
|
|
||||||
.unwrap_or(());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
response_tx.send(Ok((input_length, request))).unwrap_or(());
|
if input_length > max_input_length {
|
||||||
|
Err(ValidationError::InputLength(
|
||||||
|
input_length,
|
||||||
|
max_input_length,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok((input_length, request))
|
||||||
}
|
}
|
||||||
Err(err) => response_tx
|
},
|
||||||
.send(Err(ValidationError::Tokenizer(err.to_string())))
|
Err(err) => Err(ValidationError::Tokenizer(err.to_string())),
|
||||||
.unwrap_or(()),
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,12 +152,12 @@ pub enum ValidationError {
|
||||||
TopP,
|
TopP,
|
||||||
#[error("top_k must be strictly positive")]
|
#[error("top_k must be strictly positive")]
|
||||||
TopK,
|
TopK,
|
||||||
#[error("max_new_tokens must be <= 512")]
|
#[error("max_new_tokens must be <= {0}")]
|
||||||
MaxNewTokens,
|
MaxNewTokens(u32),
|
||||||
#[error("inputs must have less than {1} tokens. Given: {0}")]
|
#[error("inputs must have less than {1} tokens. Given: {0}")]
|
||||||
InputLength(usize, usize),
|
InputLength(usize, usize),
|
||||||
#[error("stop supports up to 4 stop sequences. Given: {0}")]
|
#[error("stop supports up to {0} stop sequences. Given: {1}")]
|
||||||
StopSequence(usize),
|
StopSequence(usize, usize),
|
||||||
#[error("tokenizer error {0}")]
|
#[error("tokenizer error {0}")]
|
||||||
Tokenizer(String),
|
Tokenizer(String),
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue